コード例 #1
0
ファイル: mpcm.py プロジェクト: Ar9av/Question-Generation
    def build_model(self):

        self.dropout_prob=0.2

        self.context_in = tf.placeholder(tf.int32, [None, None])
        self.question_in = tf.placeholder(tf.int32, [None, None])
        self.context_len = tf.reduce_sum(tf.cast(tf.not_equal(self.context_in, self.vocab[loader.PAD]), tf.int32), axis=1)
        self.question_len = tf.reduce_sum(tf.cast(tf.not_equal(self.question_in, self.vocab[loader.PAD]), tf.int32), axis=1)

        self.answer_spans_in = tf.placeholder(tf.int32, [None, 2])

        with tf.device('/cpu:*'):
            # Load glove embeddings
            glove_embeddings = loader.load_glove(FLAGS.data_path, d=FLAGS.embedding_size)
            embeddings_init = tf.constant(loader.get_embeddings(self.vocab, glove_embeddings, D=FLAGS.embedding_size))
            self.embeddings = tf.get_variable('word_embeddings', initializer=embeddings_init, dtype=tf.float32, trainable=False)
            assert self.embeddings.shape == [len(self.vocab), self.embedding_size]

        # Layer 1: representation layer
        self.context_embedded = tf.layers.dropout(tf.nn.embedding_lookup(self.embeddings, self.context_in), rate=self.dropout_prob, training=self.is_training)
        self.question_embedded = tf.layers.dropout(tf.nn.embedding_lookup(self.embeddings, self.question_in), rate=self.dropout_prob, training=self.is_training)

        # Layer 2: Filter. r is batch x con_len x q_len
        self.r_norm = (tf.expand_dims(tf.norm(self.context_embedded, ord=2, axis=2),-1) * tf.expand_dims(tf.norm(self.question_embedded, ord=2, axis=2),-2))
        self.r = tf.matmul(self.context_embedded, tf.transpose(self.question_embedded,[0,2,1]))/self.r_norm
        self.r_context = tf.reduce_max(self.r, axis=2, keep_dims=True)
        # r_question = tf.reduce_max(r, axis=1, keep_dims=True)

        self.context_filtered = self.r_context * self.context_embedded
        self.question_filtered = self.question_embedded#tf.layers.dropout(tf.tile(tf.transpose(r_question,[0,2,1]), [1,1,self.embedding_size]) * self.question_embedded, rate=0.2, training=self.is_training)

        # print(self.context_filtered)
        # print(self.question_filtered)

        # Layer 3: Context representation (BiLSTM encoder)
        num_units_encoder=FLAGS.qa_encoder_units
        with tf.variable_scope('layer3_fwd_cell'):
            cell_fw = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(num_units=num_units_encoder),
                input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                input_size=self.embedding_size,
                variational_recurrent=True,
                dtype=tf.float32)
        with tf.variable_scope('layer3_bwd_cell'):
            cell_bw = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(num_units=num_units_encoder),
                input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                input_size=self.embedding_size,
                variational_recurrent=True,
                dtype=tf.float32)
        with tf.variable_scope('context_rnn'):
            self.context_encodings,_ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, self.context_filtered, dtype=tf.float32)
        with tf.variable_scope('q_rnn'):
            self.question_encodings,_ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, self.question_filtered, dtype=tf.float32)

        # print(self.context_encodings)
        # print(self.question_encodings)

        # Layer 4: context matching layer
        eps = 1e-6
        def similarity(v1, v2, W): #v1,v2 are batch x seq x d, W is lxd
            #"btd,ld->btld"

            # W_tiled = tf.tile(tf.expand_dims(W,axis=-1), [1,1,tf.shape(W)[1]])
            # v1_weighted =tf.tensordot(v1, W_tiled, [[-1],[-1]])
            # v2_weighted =tf.tensordot(v2, W_tiled, [[-1],[-1]])

            v1_weighted = tf.expand_dims(v1,2) * tf.expand_dims(tf.expand_dims(W, axis=0),axis=0)
            v2_weighted = tf.expand_dims(v2,2) * tf.expand_dims(tf.expand_dims(W, axis=0),axis=0)

            # v1_weighted = tf.einsum("btd,ld->btld", v1, W)
            # v2_weighted = tf.einsum("btd,ld->btld", v2, W)


            # similarity = tf.einsum("bild,bjld->bijl", v1_weighted, v2_weighted)
            similarity = tf.matmul(tf.transpose(v1_weighted,[0,2,1,3]), tf.transpose(v2_weighted, [0,2,3,1]))
            similarity = tf.transpose(similarity, [0,2,3,1])

            v1_norm = tf.expand_dims(tf.norm(v1_weighted, ord=2,axis=-1),axis=-2)
            v2_norm = tf.expand_dims(tf.norm(v2_weighted, ord=2,axis=-1),axis=-3)

            # print(similarity)
            return similarity/v1_norm/v2_norm

        m_fwd = similarity(self.context_encodings[0], self.question_encodings[0], tf.get_variable("W1", (50, num_units_encoder), tf.float32))
        m_bwd = similarity(self.context_encodings[1], self.question_encodings[1], tf.get_variable("W2", (50, num_units_encoder), tf.float32))
        m_fwd2 = similarity(self.context_encodings[0], self.question_encodings[0], tf.get_variable("W3", (50, num_units_encoder), tf.float32))
        m_bwd2 = similarity(self.context_encodings[1], self.question_encodings[1], tf.get_variable("W4", (50, num_units_encoder), tf.float32))
        m_fwd3 = similarity(self.context_encodings[0], self.question_encodings[0], tf.get_variable("W5", (50, num_units_encoder), tf.float32))
        m_bwd3 = similarity(self.context_encodings[1], self.question_encodings[1], tf.get_variable("W6", (50, num_units_encoder), tf.float32))

        def get_last_seq(seq, lengths): # seq is batch x dim1 x time  x dim2
            seq = tf.transpose(seq, [0,2,1,3]) # batch x time x dim1 x dim2
            lengths = tf.maximum(lengths, tf.zeros_like(lengths, dtype=tf.int32))

            batch_size = tf.shape(lengths)[0]
            batch_nums = tf.range(0, limit=batch_size) # shape (batch_size)
            indices = tf.stack((batch_nums, lengths), axis=1) # shape (batch_size, 2)
            result = tf.gather_nd(seq, indices)
            return result # [batch_size, dim1, dim 2]

        # -1 should actually be the question length
        mask = tf.expand_dims(tf.expand_dims(tf.sequence_mask(lengths=self.question_len, maxlen=tf.reduce_max(self.question_len), dtype=tf.float32), 1),-1)
        m_full_fwd = get_last_seq(m_fwd, self.question_len-1)
        m_full_bwd = m_bwd[:,:,0,:]
        m_max_fwd  = tf.reduce_max(m_fwd2*mask, axis=2)
        m_max_bwd  = tf.reduce_max(m_bwd2*mask, axis=2)
        m_mean_fwd  = tf.reduce_sum(m_fwd3*mask, axis=2)/tf.expand_dims(tf.expand_dims(tf.cast(self.question_len, tf.float32),-1),-1)
        m_mean_bwd  = tf.reduce_sum(m_bwd3*mask, axis=2)/tf.expand_dims(tf.expand_dims(tf.cast(self.question_len, tf.float32),-1),-1)
        self.matches = tf.concat([m_full_fwd, m_full_bwd, m_max_fwd, m_max_bwd, m_mean_fwd, m_mean_bwd], axis=2)

        # print(m_full_bwd)
        # print(self.matches)

        # Layer 5: aggregate with BiLSTM
        with tf.variable_scope('layer5_fwd_cell'):
            cell_fw2 = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(num_units=FLAGS.qa_match_units),
                input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                input_size=50*6,
                variational_recurrent=True,
                dtype=tf.float32)
        with tf.variable_scope('layer5_bwd_cell'):
            cell_bw2 = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(num_units=FLAGS.qa_match_units),
                input_keep_prob=1.0,
                state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                input_size=50*6,
                variational_recurrent=True,
                dtype=tf.float32)
        with tf.variable_scope('match_rnn'):
            self.aggregated_matches,_ = tf.nn.bidirectional_dynamic_rnn(cell_fw2, cell_bw2, self.matches, dtype=tf.float32)
        self.aggregated_matches = tf.concat(self.aggregated_matches, axis=2)

        # Layer 6: Fully connected to get logits
        self.logits_start = tf.squeeze(tf.layers.dense(self.aggregated_matches, 1, activation=None),-1)
        self.logits_end = tf.squeeze(tf.layers.dense(self.aggregated_matches, 1, activation=None),-1)

        self.prob_start = tf.nn.softmax(self.logits_start)
        self.prob_end = tf.nn.softmax(self.logits_end)

        # training loss
        self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.answer_spans_in[:,0], logits=self.logits_start)*0.5+tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.answer_spans_in[:,1], logits=self.logits_end)*0.5)
        self.nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.answer_spans_in[:,0], logits=self.logits_start)*0.5+tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.answer_spans_in[:,1], logits=self.logits_end)*0.5

        if self.training_mode:
            # Calculate and clip gradients
            params = tf.trainable_variables()
            gradients = tf.gradients(self.loss, params)
            clipped_gradients, _ = tf.clip_by_global_norm(
                gradients, 5)

            # Optimization
            self.optimizer = tf.train.AdamOptimizer(FLAGS.qa_learning_rate).apply_gradients(
                zip(clipped_gradients, params))

        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.stack([tf.argmax(self.prob_start, axis=-1, output_type=tf.int32),tf.argmax(self.prob_end, axis=-1, output_type=tf.int32)],axis=1), self.answer_spans_in), tf.float32))

        # predictions: coerce start<end
        self.probs_coerced = tf.matrix_band_part(tf.matmul(tf.expand_dims(self.prob_start, 2), tf.expand_dims(self.prob_end,1)), 0, -1)

        self.pred_ix = tf.argmax(tf.reshape(self.probs_coerced, [-1, tf.shape(self.context_in)[1]*tf.shape(self.context_in)[1]]),axis=1)
        self.pred_start = tf.cast(tf.floor(tf.cast(self.pred_ix,tf.float32)/tf.cast(tf.shape(self.context_in)[1],tf.float32)), tf.int32)
        self.pred_end = tf.cast(tf.mod(tf.cast(self.pred_ix,tf.int32), tf.shape(self.context_in)[1]), tf.int32)
        self.pred_span = tf.concat([tf.expand_dims(self.pred_start,1), tf.expand_dims(self.pred_end,1)], axis=1)
コード例 #2
0
    def build_model(self):

        with tf.device('/cpu:*'):
            self.context_raw = tf.placeholder(tf.string, [None, None], name='placeholder_context_raw')  # source vectors of unknown size
            self.question_raw  = tf.placeholder(tf.string, [None, None])  # target vectors of unknown size
            self.answer_raw  = tf.placeholder(tf.string, [None, None], name='placeholder_ans_raw')  # target vectors of unknown size
        self.context_ids = tf.placeholder(tf.int32, [None, None], name='placeholder_context_ids')  # source vectors of unknown size
        self.context_copy_ids = tf.placeholder(tf.int32, [None, None], name='placeholder_context_cp_ids')  # source vectors of unknown size
        self.context_length  = tf.placeholder(tf.int32, [None], name='placeholder_context_len')     # size(source)
        self.context_vocab_size  = tf.placeholder(tf.int32, [None], name='placeholder_context_vocsize')     # size(source_vocab)
        self.question_ids = tf.placeholder(tf.int32, [None, None])  # target vectors of unknown size
        self.question_onehot = tf.placeholder(tf.float32, [None, None, None])  # target vectors of unknown size
        self.question_length  = tf.placeholder(tf.int32, [None])     # size(source)
        self.answer_ids  = tf.placeholder(tf.int32, [None, None], name='placeholder_ans_ids')  # target vectors of unknown size
        self.answer_length  = tf.placeholder(tf.int32, [None], name='placeholder_ans_len')
        self.answer_locs  = tf.placeholder(tf.int32, [None,None], name='placeholder_ans_len')
        self.original_ix  = tf.placeholder(tf.int32, [None]) # unused - gives the index of the input in the unshuffled dataset

        self.hide_answer_in_copy = tf.placeholder_with_default(False, (),"hide_answer_in_copy")


        self.context_in = (self.context_raw, self.context_ids, self.context_copy_ids, self.context_length, self.context_vocab_size)
        self.question_in = (self.question_raw, self.question_ids, self.question_onehot, self.question_length)
        self.answer_in = (self.answer_raw, self.answer_ids, self.answer_length, self.answer_locs)
        self.input_batch = (self.context_in, self.question_in, self.answer_in, self.original_ix)

        curr_batch_size = tf.shape(self.answer_ids)[0]

        with tf.variable_scope('input_pipeline'):
            # build teacher output - coerce to vocab and pad with SOS/EOS
            # also build output for loss - one hot over vocab+context

            self.question_teach = tf.concat([tf.tile(tf.constant(self.vocab[SOS], shape=[1, 1]), [curr_batch_size,1]), self.question_ids[:,:-1]], axis=1)
            # self.question_teach_oh = tf.concat([tf.one_hot(tf.tile(tf.constant(self.vocab[SOS], shape=[1, 1]), [curr_batch_size,1]), depth=len(self.vocab)+FLAGS.max_copy_size), self.question_onehot[:,:-1,:]], axis=1)


            # init embeddings
            with tf.device('/cpu:*'):
                glove_embeddings = loader.load_glove(FLAGS.data_path, d=FLAGS.embedding_size)
                embeddings_init = tf.constant(loader.get_embeddings(self.vocab, glove_embeddings, D=FLAGS.embedding_size))
                self.embeddings = tf.get_variable('word_embeddings', initializer=embeddings_init, dtype=tf.float32)
                if FLAGS.loc_embeddings:
                    self.copy_embeddings = tf.get_variable('copy_embeddings', shape=(FLAGS.max_copy_size, FLAGS.embedding_size), dtype=tf.float32)
                else:
                    self.copy_embeddings = tf.nn.embedding_lookup(self.embeddings, tf.tile([self.vocab[OOV]], [FLAGS.max_copy_size]))
                self.full_embeddings = tf.concat([self.embeddings, self.copy_embeddings], axis=0)
                assert self.embeddings.shape == [len(self.vocab), self.embedding_size]

                # this uses a load of memory, dont create unless it's actually needed
                if self.use_embedding_loss:
                    self.glove_vocab = loader.get_glove_vocab(FLAGS.data_path, size=-1, d=FLAGS.embedding_size, filter_to_squad=True)
                    extended_embeddings_init = tf.constant(loader.get_embeddings(self.glove_vocab, glove_embeddings, D=FLAGS.embedding_size))
                    self.extended_embeddings = tf.get_variable('full_word_embeddings', initializer=extended_embeddings_init, dtype=tf.float32, trainable=False)

                self.question_teach_ids = tf.concat([tf.tile(tf.constant(self.vocab[SOS], shape=[1, 1]), [curr_batch_size, 1]), self.question_ids[:, :-1]], axis=1)
                self.question_teach_embedded = tf.nn.embedding_lookup(self.full_embeddings, self.question_teach_ids)

                del glove_embeddings

            # First, coerce them to the shortlist vocab. Then embed
            self.context_coerced = tf.where(tf.greater_equal(self.context_ids, len(self.vocab)), tf.tile(tf.constant([[self.vocab[OOV]]]), tf.shape(self.context_ids)), self.context_ids)
            self.context_embedded = tf.nn.embedding_lookup(self.embeddings, self.context_coerced)

            self.answer_coerced = tf.where(tf.greater_equal(self.answer_ids, len(self.vocab)), tf.tile(tf.constant([[self.vocab[OOV]]]), tf.shape(self.answer_ids)), self.answer_ids)
            self.answer_embedded = tf.nn.embedding_lookup(self.embeddings, self.answer_coerced) # batch x seq x embed

            # Is context token in answer?
            max_context_len = tf.reduce_max(self.context_length)
            context_ix = tf.tile(tf.expand_dims(tf.range(max_context_len),axis=0), [curr_batch_size,1])
            gt_start = tf.greater_equal(context_ix, tf.tile(tf.expand_dims(self.answer_locs[:,0],axis=1), [1, max_context_len]))
            lt_end = tf.less(context_ix, tf.tile(tf.expand_dims(self.answer_locs[:,0]+self.answer_length,axis=1), [1, max_context_len]))
            self.in_answer_feature = tf.expand_dims(tf.cast(tf.logical_and(gt_start, lt_end), tf.float32),axis=2)

            embed_feats =[self.context_embedded, self.in_answer_feature]

            if FLAGS.begin_ans_feat:
                self.begin_ans_feat = tf.expand_dims(tf.one_hot(self.answer_locs[:,0], depth=max_context_len), axis=2)
                embed_feats.append(self.begin_ans_feat)

            # augment embedding
            self.context_embedded = tf.concat(embed_feats, axis=2)

        # Build encoder for context
        # Build RNN cell for encoder
        with tf.variable_scope('context_encoder'):
            context_encoder_cell_fwd = tf.contrib.rnn.DropoutWrapper(
                    cell=tf.nn.rnn_cell.MultiRNNCell([tf.contrib.rnn.BasicLSTMCell(num_units=self.context_encoder_units) for n in range(FLAGS.ctxt_encoder_depth)]),
                    input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    input_size=self.embedding_size+1+(1 if FLAGS.begin_ans_feat else 0),
                    variational_recurrent=True,
                    dtype=tf.float32)
            context_encoder_cell_bwd = tf.contrib.rnn.DropoutWrapper(
                    cell=tf.nn.rnn_cell.MultiRNNCell([tf.contrib.rnn.BasicLSTMCell(num_units=self.context_encoder_units) for n in range(FLAGS.ctxt_encoder_depth)]),
                    input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    input_size=self.embedding_size+1+(1 if FLAGS.begin_ans_feat else 0),
                    variational_recurrent=True,
                    dtype=tf.float32)

            # Unroll encoder RNN
            context_encoder_output_parts, context_encoder_state = tf.nn.bidirectional_dynamic_rnn(
                context_encoder_cell_fwd, context_encoder_cell_bwd, self.context_embedded,
                sequence_length=self.context_length, dtype=tf.float32)
            self.context_encoder_output = tf.concat([context_encoder_output_parts[0], context_encoder_output_parts[1]], axis=2) # batch x seq x 2*units


        # Build encoder for mean(encoder(context)) + answer
        # Build RNN cell for encoder
        with tf.variable_scope('a_encoder'):
            # To build the "extractive condition encoding" input, take embeddings of answer words concated with encoded context at that position

            # This is super involved! Even though we have the right indices we have to do a LOT of massaging to get them in the right shape
            seq_length = tf.reduce_max(self.answer_length)

            self.indices = self.answer_locs

            # cap the indices to be valid
            self.indices = tf.minimum(self.indices, tf.tile(tf.expand_dims(self.context_length-1,axis=1),[1,tf.reduce_max(self.answer_length)]))

            batch_ix = tf.expand_dims(tf.transpose(tf.tile(tf.expand_dims(tf.range(curr_batch_size),axis=0),[seq_length,1]),[1,0]),axis=2)
            full_ix = tf.concat([batch_ix,tf.expand_dims(self.indices,axis=-1)], axis=2)
            self.context_condition_encoding = tf.gather_nd(self.context_encoder_output, full_ix)


            self.full_condition_encoding = tf.concat([self.context_condition_encoding, self.answer_embedded], axis=2)

            a_encoder_cell_fwd = tf.contrib.rnn.DropoutWrapper(cell=tf.nn.rnn_cell.MultiRNNCell([
                    tf.contrib.rnn.BasicLSTMCell(num_units=self.answer_encoder_units) for n in range(FLAGS.ans_encoder_depth)]),
                    input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    input_size=self.context_encoder_units*2+self.embedding_size,
                    variational_recurrent=True,
                    dtype=tf.float32)
            a_encoder_cell_bwd = tf.contrib.rnn.DropoutWrapper(cell=tf.nn.rnn_cell.MultiRNNCell([
                    tf.contrib.rnn.BasicLSTMCell(num_units=self.answer_encoder_units) for n in range(FLAGS.ans_encoder_depth)]),
                    input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                    input_size=self.context_encoder_units*2+self.embedding_size,
                    variational_recurrent=True,
                    dtype=tf.float32)

            # Unroll encoder RNN
            a_encoder_output_parts, a_encoder_state_parts = tf.nn.bidirectional_dynamic_rnn(
                a_encoder_cell_fwd, a_encoder_cell_bwd, self.full_condition_encoding,
                sequence_length=self.answer_length, dtype=tf.float32)

            # This is actually wrong! It should take last element of the fwd RNN, and first element of the bwd RNN. It doesn't seem to matter in experiments, and fixing it would be a breaking change.
            # self.a_encoder_final_state = tf.concat([ops.get_last_from_seq(a_encoder_output_parts[0], self.answer_length-1), ops.get_last_from_seq(a_encoder_output_parts[1], self.answer_length-1)], axis=1)
            # Fixed!
            self.a_encoder_final_state = tf.concat([ops.get_last_from_seq(a_encoder_output_parts[0], self.answer_length-1), a_encoder_output_parts[1][:,0,:]], axis=1)

        # build init state
        with tf.variable_scope('decoder_initial_state'):
            L = tf.get_variable('decoder_L', [self.context_encoder_units*2, self.context_encoder_units*2], initializer=tf.glorot_uniform_initializer(), dtype=tf.float32)
            W0 = tf.get_variable('decoder_W0', [self.context_encoder_units*2, self.decoder_units], initializer=tf.glorot_uniform_initializer(), dtype=tf.float32)
            b0 = tf.get_variable('decoder_b0', [self.decoder_units], initializer=tf.zeros_initializer(), dtype=tf.float32)

            # This is a bit cheeky - this should be injected by the more advanced model. Consider refactoring into separate methods then overloading the one that handles this
            if self.advanced_condition_encoding:
                self.context_encoding = self.a_encoder_final_state # this would be the maluuba model
            else:
                self.context_encoding = tf.reduce_mean(self.context_condition_encoding, axis=1) # this is the baseline model

            r = tf.reduce_sum(self.context_encoder_output, axis=1)/tf.expand_dims(tf.cast(self.context_length,tf.float32),axis=1) + tf.matmul(self.context_encoding,L)
            self.s0 = tf.nn.tanh(tf.matmul(r,W0) + b0)

        if self.advanced_condition_encoding and FLAGS.full_context_encoding:
            # for Maluuba model, decoder inputs are concat of context and answer encoding
            # Strictly speaking this is still wrong - the attn mech uses only the context encoding
            self.context_encoder_output = tf.concat([self.context_encoder_output, tf.tile(tf.expand_dims(self.a_encoder_final_state,axis=1),[1,max_context_len,1])], axis=2)

        # decode
        with tf.variable_scope('decoder_init'):

            beam_memory = tf.contrib.seq2seq.tile_batch( self.context_encoder_output, multiplier=FLAGS.beam_width )
            beam_memory_sequence_length = tf.contrib.seq2seq.tile_batch( self.context_length, multiplier=FLAGS.beam_width)
            s0_tiled = tf.contrib.seq2seq.tile_batch( self.s0, multiplier=FLAGS.beam_width)
            beam_init_state = tf.contrib.rnn.LSTMStateTuple(s0_tiled, tf.contrib.seq2seq.tile_batch(tf.zeros([curr_batch_size, self.decoder_units]), multiplier=FLAGS.beam_width))

            train_memory = self.context_encoder_output
            train_memory_sequence_length = self.context_length
            train_init_state = tf.contrib.rnn.LSTMStateTuple(self.s0, tf.zeros([curr_batch_size, self.decoder_units]))

        with tf.variable_scope('attn_mech') as scope:
            train_attention_mechanism = copy_attention_wrapper.BahdanauAttention(
                            num_units=self.decoder_units, memory=train_memory,
                            memory_sequence_length=train_memory_sequence_length, name='bahdanau_attn')

            if FLAGS.separate_copy_mech:
                train_copy_mechanism = copy_attention_wrapper.BahdanauAttention(
                                num_units=self.decoder_units, memory=train_memory,
                                memory_sequence_length=train_memory_sequence_length, name='bahdanau_attn_copy')
            else:
                train_copy_mechanism = train_attention_mechanism

            with tf.variable_scope('decoder_cell'):
                train_decoder_cell = tf.contrib.rnn.DropoutWrapper(
                        cell=tf.contrib.rnn.BasicLSTMCell(num_units=self.decoder_units),
                        input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                        state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                        output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                        input_size=self.embedding_size+self.decoder_units//2,
                        variational_recurrent=True,
                        dtype=tf.float32)

            train_decoder_cell = copy_attention_wrapper.CopyAttentionWrapper(train_decoder_cell,
                                                                train_attention_mechanism,
                                                                attention_layer_size=self.decoder_units / 2,
                                                                alignment_history=False,
                                                                copy_mechanism=train_copy_mechanism,
                                                                output_attention=True,
                                                                initial_cell_state=train_init_state, name='copy_attention_wrapper')

            train_init_state = train_decoder_cell.zero_state(curr_batch_size*(1), tf.float32).clone(cell_state=train_init_state)

        # copy_mechanism = copy_attention_wrapper.BahdanauAttention(
        #                 num_units=self.decoder_units, memory=memory,
        #                 memory_sequence_length=memory_sequence_length)

        with tf.variable_scope('attn_mech', reuse=True) as scope:
            scope.reuse_variables()
            beam_attention_mechanism = copy_attention_wrapper.BahdanauAttention(
                            num_units=self.decoder_units, memory=beam_memory,
                            memory_sequence_length=beam_memory_sequence_length, name='bahdanau_attn')

            if FLAGS.separate_copy_mech:
                beam_copy_mechanism = copy_attention_wrapper.BahdanauAttention(
                                num_units=self.decoder_units, memory=beam_memory,
                                memory_sequence_length=beam_memory_sequence_length, name='bahdanau_attn_copy')
            else:
                beam_copy_mechanism = beam_attention_mechanism

            with tf.variable_scope('decoder_cell', reuse=True):
                beam_decoder_cell = tf.contrib.rnn.DropoutWrapper(
                        cell=tf.contrib.rnn.BasicLSTMCell(num_units=self.decoder_units),
                        input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                        state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                        output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
                        input_size=self.embedding_size+self.decoder_units//2,
                        variational_recurrent=True,
                        dtype=tf.float32)

            beam_decoder_cell = copy_attention_wrapper.CopyAttentionWrapper(beam_decoder_cell,
                                                                beam_attention_mechanism,
                                                                attention_layer_size=self.decoder_units / 2,
                                                                alignment_history=False,
                                                                copy_mechanism=beam_copy_mechanism,
                                                                output_attention=True,
                                                                initial_cell_state=beam_init_state, name='copy_attention_wrapper')

            beam_init_state = beam_decoder_cell.zero_state(curr_batch_size*(FLAGS.beam_width), tf.float32).clone(cell_state=beam_init_state)


        # We have to make two copies of the layer as beam search uses different shapes - but force them to share variables
        with tf.variable_scope('copy_layer') as scope:
            ans_mask = 1-tf.reshape(self.in_answer_feature,[curr_batch_size,-1])
            self.answer_mask = tf.cond(self.hide_answer_in_copy, lambda: ans_mask, lambda: tf.ones(tf.shape(ans_mask)))

            train_projection_layer = copy_layer.CopyLayer(FLAGS.decoder_units//2, FLAGS.max_context_len,
                                            switch_units=FLAGS.switch_units,
                                            source_provider=lambda: self.context_copy_ids if FLAGS.context_as_set else self.context_ids,
                                            source_provider_sl=lambda: self.context_ids,
                                            condition_encoding=lambda: self.context_encoding,
                                            vocab_size=len(self.vocab),
                                            training_mode=self.is_training,
                                            output_mask=lambda: self.answer_mask,
                                            context_as_set=FLAGS.context_as_set,
                                            max_copy_size=FLAGS.max_copy_size,
                                            mask_oovs=tf.logical_not(self.is_training),
                                            name="copy_layer")

            scope.reuse_variables()
            answer_mask_beam = tf.contrib.seq2seq.tile_batch(self.answer_mask, multiplier=FLAGS.beam_width)


            beam_projection_layer = copy_layer.CopyLayer(FLAGS.decoder_units//2, FLAGS.max_context_len,
                                            switch_units=FLAGS.switch_units,
                                            source_provider=lambda: self.context_copy_ids if FLAGS.context_as_set else self.context_ids,
                                            source_provider_sl=lambda: self.context_ids,
                                            condition_encoding=lambda: self.context_encoding,
                                            vocab_size=len(self.vocab),
                                            training_mode=self.is_training,
                                            output_mask=lambda: answer_mask_beam,
                                            context_as_set=FLAGS.context_as_set,
                                            max_copy_size=FLAGS.max_copy_size,
                                            mask_oovs=tf.logical_not(self.is_training),
                                            name="copy_layer")

        with tf.variable_scope('decoder_unroll') as scope:
            # Helper - training
            training_helper = tf.contrib.seq2seq.TrainingHelper(
                self.question_teach_embedded, self.question_length)
                # self.question_teach_oh, self.question_length)
                # decoder_emb_inp, length(decoder_emb_inp)+1)

            # Decoder - training
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                train_decoder_cell, training_helper,
                initial_state=train_init_state,
                # initial_state=encoder_state
                # TODO: hardcoded FLAGS.max_copy_size is longest context in SQuAD - this will need changing for a new dataset!!!
                output_layer=train_projection_layer
                )

            # Unroll the decoder
            training_outputs, training_decoder_states,training_out_lens = tf.contrib.seq2seq.dynamic_decode(training_decoder,impute_finished=True, maximum_iterations=tf.reduce_max(self.question_length))

            training_probs=training_outputs.rnn_output

        with tf.variable_scope(scope, reuse=True):
            start_tokens = tf.tile(tf.constant([self.vocab[SOS]], dtype=tf.int32), [ curr_batch_size  ] )
            end_token = self.vocab[EOS]

            # DBS degrades to normal BS with groups=1, but my implementation is 1) probably slower and 2) wont receive updates from upstream
            if FLAGS.diverse_bs:
                beam_decoder = DiverseBeamSearchDecoder( cell = beam_decoder_cell,
                                                               embedding = self.full_embeddings,
                                                               start_tokens = start_tokens,
                                                               end_token = end_token,
                                                               initial_state = beam_init_state,
                                                               beam_width = FLAGS.beam_width,
                                                               output_layer = beam_projection_layer ,
                                                               length_penalty_weight=FLAGS.length_penalty,
                                                               num_groups=FLAGS.beam_groups,
                                                               diversity_param=FLAGS.beam_diversity)
            else:
               beam_decoder = tf.contrib.seq2seq.BeamSearchDecoder( cell = beam_decoder_cell,
                                                                  embedding = self.full_embeddings,
                                                                  start_tokens = start_tokens,
                                                                  end_token = end_token,
                                                                  initial_state = beam_init_state,
                                                                  beam_width = FLAGS.beam_width,
                                                                  output_layer = beam_projection_layer ,
                                                                  length_penalty_weight=FLAGS.length_penalty)

            beam_outputs, beam_decoder_states,beam_out_lens = tf.contrib.seq2seq.dynamic_decode(  beam_decoder,
                                                                    impute_finished=False,
                                                                   maximum_iterations=40 )

            beam_pred_ids = beam_outputs.predicted_ids[:,:,0]

            # tf1.4 (and maybe others) return -1 for parts of the sequence outside the valid length, replace this with PAD (0)
            beam_mask = tf.sequence_mask(beam_out_lens[:,0], tf.shape(beam_pred_ids)[1], dtype=tf.int32)
            beam_pred_ids = beam_pred_ids*beam_mask

            beam_pred_scores = beam_outputs.beam_search_decoder_output.scores

            # pred_ids = debug_shape(pred_ids, "pred ids")
            beam_probs = tf.one_hot(beam_pred_ids, depth=len(self.vocab)+FLAGS.max_copy_size)


        self.q_hat = training_probs#tf.nn.softmax(logits, dim=2)

        # because we've done a few logs of softmaxes, there can be some precision problems that lead to non zero probability outside of the valid vocab, fix it here:
        self.max_vocab_size = tf.tile(tf.expand_dims(self.context_vocab_size+len(self.vocab),axis=1),[1,tf.shape(self.question_onehot)[1]])
        output_mask = tf.sequence_mask(self.max_vocab_size, FLAGS.max_copy_size+len(self.vocab), dtype=tf.float32)
        # self.q_hat = self.q_hat*output_mask


        with tf.variable_scope('output'), tf.device('/cpu:*'):
            self.q_hat_ids = tf.argmax(self.q_hat,axis=2,output_type=tf.int32)
            self.a_string = ops.id_tensor_to_string(self.answer_coerced, self.rev_vocab, self.context_raw, context_as_set=FLAGS.context_as_set)
            self.q_hat_string = ops.id_tensor_to_string(self.q_hat_ids, self.rev_vocab, self.context_raw, context_as_set=FLAGS.context_as_set)

            self.q_hat_beam_ids = beam_pred_ids
            self.q_hat_beam_string = ops.id_tensor_to_string(self.q_hat_beam_ids, self.rev_vocab, self.context_raw, context_as_set=FLAGS.context_as_set)

            self.q_hat_full_beam_str = [ops.id_tensor_to_string(ids*tf.sequence_mask(beam_out_lens[:,i], tf.shape(beam_pred_ids)[1], dtype=tf.int32), self.rev_vocab, self.context_raw, context_as_set=FLAGS.context_as_set) for i,ids in enumerate(tf.unstack(beam_outputs.predicted_ids,axis=2))]
            self.q_hat_full_beam_lens = [len for len in tf.unstack(beam_out_lens,axis=1)]
            self.q_hat_beam_lens = beam_out_lens[:,0]

            self.q_gold = ops.id_tensor_to_string(self.question_ids, self.rev_vocab, self.context_raw, context_as_set=FLAGS.context_as_set)
            self._output_summaries.extend(
                [tf.summary.text("q_hat", self.q_hat_string),
                tf.summary.text("q_gold", self.q_gold),
                tf.summary.text("answer", self.answer_raw)])



        with tf.variable_scope('train_loss'):
            self.target_weights = tf.sequence_mask(
                        self.question_length, tf.shape(self.q_hat)[1], dtype=tf.float32)

            logits = ops.safe_log(self.q_hat)

            # if the switch variable is fully latent, this gets a bit fiddly - we have to sum probabilities over all correct tokens, *then* take CE loss
            # otherwise the built in fn is fine (and almost certainly faster)
            if FLAGS.latent_switch:
                self.crossent =-1*ops.safe_log(tf.reduce_sum(self.q_hat*self.question_onehot, axis=2))
            else:
                self.crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.question_ids, logits=logits)
            qlen_float = tf.cast(self.question_length, tf.float32)
            self.xe_loss = tf.reduce_mean(tf.reduce_sum(self.crossent * self.target_weights,axis=1)/qlen_float,axis=0)
            self.nll = tf.reduce_sum(self.crossent * self.target_weights,axis=1)

            # TODO: Check these should be included in baseline?
            # get sum of all probabilities for words that are also in answer
            answer_oh = tf.one_hot(self.answer_ids, depth=len(self.vocab) +FLAGS.max_copy_size)
            answer_mask = tf.tile(tf.reduce_sum(answer_oh, axis=1,keep_dims=True), [1,tf.reduce_max(self.question_length),1])
            self.suppression_loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_sum(answer_mask * self.q_hat,axis=2)*self.target_weights,axis=1)/qlen_float,axis=0)

            # entropy maximiser
            self.entropy_loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_sum(self.q_hat *ops.safe_log(self.q_hat),axis=2)*self.target_weights,axis=1)/qlen_float,axis=0)

            if self.use_embedding_loss:
                vocab_cap = tf.tile(tf.expand_dims(self.context_vocab_size+len(self.vocab)-1,axis=1),[1,FLAGS.max_copy_size+len(self.vocab)])
                with tf.device('/cpu:*'):
                    self.local_vocab_string = ops.id_tensor_to_string(tf.minimum(tf.tile(tf.expand_dims(tf.range(FLAGS.max_copy_size+len(self.vocab)),axis=0), [curr_batch_size,1]), vocab_cap), self.rev_vocab, self.context_raw, context_as_set=FLAGS.context_as_set)
                    self.local_vocab_to_extended = ops.string_tensor_to_id(self.local_vocab_string, self.glove_vocab)
                    self.local_embeddings = tf.reshape(tf.nn.embedding_lookup(self.extended_embeddings, self.local_vocab_to_extended), [curr_batch_size, FLAGS.max_copy_size+len(self.vocab),self.embedding_size])

                    self.q_gold_ids_extended = ops.string_tensor_to_id(self.question_raw, self.glove_vocab)

                # self.q_hat_extended = tf.matmul(self.q_hat, tf.stop_gradient(self.local_vocab_to_extended)) # batch x seq x ext_vocab

                self.q_gold_embedded_extended = tf.nn.embedding_lookup(self.extended_embeddings, self.q_gold_ids_extended)

                self.q_hat_embedded_extended = tf.matmul(self.q_hat,self.local_embeddings)
                # self.q_hat_embedded_extended = tf.matmul(self.extended_embeddings, tf.cast(self.q_hat_ids_extended, tf.int32), b_is_sparse=True)

                self.similarity = tf.reduce_sum(self.q_hat_embedded_extended * tf.stop_gradient(self.q_gold_embedded_extended), axis=-1)/(1e-5+tf.norm(self.q_gold_embedded_extended, axis=-1)*tf.norm(self.q_hat_embedded_extended, axis=-1)) # batch x seq
                self.dist = tf.reduce_sum(tf.square(self.q_hat_embedded_extended - tf.stop_gradient(self.q_gold_embedded_extended)), axis=-1)
                self._train_summaries.append(tf.summary.scalar("debug/similarities", tf.reduce_mean(tf.reduce_sum(self.similarity* self.target_weights,axis=1)/qlen_float)))
                self._train_summaries.append(tf.summary.scalar("debug/dist", tf.reduce_mean(tf.reduce_sum(self.dist* self.target_weights,axis=1)/qlen_float)))
                self.loss=tf.reduce_mean(tf.reduce_sum(tf.abs(tf.acos(self.similarity)) * self.target_weights,axis=1)/qlen_float,axis=0)
                # self.loss = tf.abs(tf.acos(self.similarity)
            else:

                self.loss = self.xe_loss + FLAGS.suppression_weight*self.suppression_loss + FLAGS.entropy_weight*self.entropy_loss

        self.shortlist_prob = tf.reduce_sum(self.q_hat[:,:,:len(self.vocab)],axis=2)*self.target_weights
        self.copy_prob = tf.reduce_sum(self.q_hat[:,:,len(self.vocab):],axis=2)*self.target_weights

        self.mean_copy_prob = tf.reduce_sum(self.copy_prob,axis=1)/qlen_float

        self._train_summaries.append(tf.summary.scalar("debug/shortlist_prob", tf.reduce_mean(tf.reduce_sum(self.shortlist_prob,axis=1)/qlen_float)))
        self._train_summaries.append(tf.summary.scalar("debug/copy_prob", tf.reduce_mean(tf.reduce_sum(self.copy_prob,axis=1)/qlen_float)))

        self._train_summaries.append(tf.summary.scalar('train_loss/xe_loss', self.xe_loss))
        self._train_summaries.append(tf.summary.scalar('train_loss/entropy_loss', self.entropy_loss))
        self._train_summaries.append(tf.summary.scalar('train_loss/suppr_loss', self.suppression_loss))





        #dont bother calculating gradients if not training
        if self.training_mode:
            # Calculate and clip gradients
            params = tf.trainable_variables()
            gradients = tf.gradients(self.loss, params)
            clipped_gradients, _ = tf.clip_by_global_norm(
                gradients, 5)

            # Optimization
            if FLAGS.opt_type == "sgd":
                self.global_step = tf.train.create_global_step(self.graph)

                self.sgd_lr = 1 * tf.pow(0.5, tf.cast(tf.maximum(0, tf.cast(self.global_step, tf.int32)-8000)/1000, tf.float32))
                self._train_summaries.append(tf.summary.scalar('debug/sgd_lr', self.sgd_lr))
                self.optimizer = tf.train.GradientDescentOptimizer(self.sgd_lr).apply_gradients(
                    zip(clipped_gradients, params)) if self.training_mode else tf.no_op()
            elif FLAGS.opt_type == "sgd_mom":
                self.global_step = tf.train.create_global_step(self.graph)

                self.sgd_lr = 0.1 * tf.pow(0.5, tf.cast(tf.maximum(0, tf.cast(self.global_step, tf.int32)-16000)/2000, tf.float32))
                self._train_summaries.append(tf.summary.scalar('debug/sgd_lr', self.sgd_lr))
                momentum = tf.Variable(0.1, trainable=False)

                self.optimizer = tf.train.MomentumOptimizer(self.sgd_lr, momentum).apply_gradients(
                    zip(clipped_gradients, params)) if self.training_mode else tf.no_op()
            else:
                self.optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).apply_gradients(
                    zip(clipped_gradients, params)) if self.training_mode else tf.no_op()

        self.accuracy = tf.reduce_mean(tf.cast(tf.reduce_sum(self.question_onehot  * tf.contrib.seq2seq.hardmax(self.q_hat), axis=-1),tf.float32)*self.target_weights)
コード例 #3
0
def main(_):
    model = FileLoaderModel('./models/BASELINE')
    squad = loader.load_squad_triples(FLAGS.data_path, True, as_dict=True)

    disc_path = FLAGS.model_dir + 'saved/discriminator-trained-latent'

    glove_embeddings = loader.load_glove(FLAGS.data_path)

    if FLAGS.eval_metrics:
        lm = LstmLmInstance()
        # qa = MpcmQaInstance()
        qa = QANetInstance()

        lm.load_from_chkpt(FLAGS.model_dir + 'saved/lmtest')
        # qa.load_from_chkpt(FLAGS.model_dir+'saved/qatest')
        qa.load_from_chkpt(FLAGS.model_dir + 'saved/qanet')

        discriminator = DiscriminatorInstance(trainable=False, path=disc_path)

    f1s = []
    bleus = []
    qa_scores = []
    qa_scores_gold = []
    lm_scores = []
    nlls = []
    disc_scores = []
    sowe_similarities = []

    qgolds = []
    qpreds = []
    ctxts = []
    answers = []
    ans_positions = []

    metric_individuals = []
    res = []

    missing = 0

    for id, el in tqdm(squad.items()):

        unfilt_ctxt_batch = [el[0]]
        a_text_batch = [el[2]]
        a_pos_batch = [el[3]]

        ctxts.extend(unfilt_ctxt_batch)
        answers.extend(a_text_batch)
        ans_positions.extend(a_pos_batch)

        pred_str = model.get_q(id)

        if pred_str is None:
            missing += 1
            continue
        gold_str = el[1]

        if FLAGS.eval_metrics:
            qa_pred = qa.get_ans(unfilt_ctxt_batch, [pred_str])
            gold_qa_pred = qa.get_ans(unfilt_ctxt_batch, [gold_str])

            qa_score = metrics.f1(el[2].lower(), qa_pred[0].lower())
            qa_score_gold = metrics.f1(el[2].lower(), gold_qa_pred[0].lower())
            lm_score = lm.get_seq_perplexity([pred_str]).tolist()
            disc_score = discriminator.get_pred(unfilt_ctxt_batch, [pred_str],
                                                a_text_batch,
                                                a_pos_batch).tolist()[0]

        f1s.append(metrics.f1(gold_str, pred_str))
        bleus.append(metrics.bleu(gold_str, pred_str))
        qgolds.append(gold_str)
        qpreds.append(pred_str)

        # calc cosine similarity between sums of word embeddings
        pred_sowe = np.sum(np.asarray([
            glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros(
                (FLAGS.embedding_size, ))
            for w in preprocessing.tokenise(pred_str, asbytes=False)
        ]),
                           axis=0)
        gold_sowe = np.sum(np.asarray([
            glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros(
                (FLAGS.embedding_size, ))
            for w in preprocessing.tokenise(gold_str, asbytes=False)
        ]),
                           axis=0)
        this_similarity = np.inner(pred_sowe, gold_sowe) / np.linalg.norm(
            pred_sowe, ord=2) / np.linalg.norm(gold_sowe, ord=2)

        sowe_similarities.append(this_similarity)

        this_metric_dict = {
            'f1': f1s[-1],
            'bleu': bleus[-1],
            'nll': 0,
            'sowe': sowe_similarities[-1]
        }
        if FLAGS.eval_metrics:
            this_metric_dict = {
                **this_metric_dict, 'qa': qa_score,
                'lm': lm_score,
                'disc': disc_score
            }
            qa_scores.append(qa_score)
            lm_scores.append(lm_score)
            disc_scores.append(disc_score)
        metric_individuals.append(this_metric_dict)

        res.append({
            'c': el[0],
            'q_pred': pred_str,
            'q_gold': gold_str,
            'a_pos': el[3],
            'a_text': el[2],
            'metrics': this_metric_dict
        })

    metric_dict = {
        'f1': np.mean(f1s),
        'bleu': np.mean(bleus),
        'nll': 0,
        'sowe': np.mean(sowe_similarities)
    }
    if FLAGS.eval_metrics:
        metric_dict = {
            **metric_dict, 'qa': np.mean(qa_scores),
            'lm': np.mean(lm_scores),
            'disc': np.mean(disc_scores)
        }
    # print(res)
    with open(FLAGS.log_dir + 'out_eval_BASELINE' +
              ("_train" if not FLAGS.eval_on_dev else "") + '.json',
              'w',
              encoding='utf-8') as fp:
        json.dump({"metrics": metric_dict, "results": res}, fp)

    print("F1: ", np.mean(f1s))
    print("BLEU: ", np.mean(bleus))
    print("NLL: ", 0)
    print("SOWE: ", np.mean(sowe_similarities))
    if FLAGS.eval_metrics:
        print("QA: ", np.mean(qa_scores))
        print("LM: ", np.mean(lm_scores))
        print("Disc: ", np.mean(disc_scores))

    print(missing, " ids were missing")
コード例 #4
0
ファイル: eval.py プロジェクト: Ar9av/Question-Generation
def main(_):

    model_type=FLAGS.model_type
    # chkpt_path = FLAGS.model_dir+'saved/qgen-maluuba-crop-glove-smart'
    # chkpt_path = FLAGS.model_dir+'qgen-saved/MALUUBA-CROP-LATENT/1533247183'
    disc_path = FLAGS.model_dir+'saved/discriminator-trained-latent'
    chkpt_path = FLAGS.model_dir+'qgen/'+ model_type+'/'+FLAGS.eval_model_id

    # load dataset
    # train_data = loader.load_squad_triples(FLAGS.data_path, False)
    dev_data = loader.load_squad_triples(FLAGS.data_path, dev=FLAGS.eval_on_dev, test=FLAGS.eval_on_test)

    if len(dev_data) < FLAGS.num_eval_samples:
        exit('***ERROR*** Eval dataset is smaller than the num_eval_samples flag!')
    if len(dev_data) > FLAGS.num_eval_samples:
        print('***WARNING*** Eval dataset is larger than the num_eval_samples flag!')

    # train_contexts_unfilt, _,_,train_a_pos_unfilt = zip(*train_data)
    dev_contexts_unfilt, _,_,dev_a_pos_unfilt = zip(*dev_data)

    if FLAGS.filter_window_size_before >-1:
        # train_data = preprocessing.filter_squad(train_data, window_size=FLAGS.filter_window_size, max_tokens=FLAGS.filter_max_tokens)
        dev_data = preprocessing.filter_squad(dev_data, window_size_before=FLAGS.filter_window_size_before, window_size_after=FLAGS.filter_window_size_after, max_tokens=FLAGS.filter_max_tokens)


    # print('Loaded SQuAD with ',len(train_data),' triples')
    print('Loaded SQuAD dev set with ',len(dev_data),' triples')
    # train_contexts, train_qs, train_as,train_a_pos = zip(*train_data)
    dev_contexts, dev_qs, dev_as, dev_a_pos = zip(*dev_data)


    # vocab = loader.get_vocab(train_contexts, tf.app.flags.FLAGS.vocab_size)
    with open(chkpt_path+'/vocab.json') as f:
        vocab = json.load(f)

    with SquadStreamer(vocab, FLAGS.eval_batch_size, 1, shuffle=False) as dev_data_source:

        glove_embeddings = loader.load_glove(FLAGS.data_path)


        # Create model
        if model_type[:7] == "SEQ2SEQ":
            model = Seq2SeqModel(vocab, training_mode=False)
        elif model_type[:2] == "RL":
            # TEMP - no need to spin up the LM or QA model at eval time
            FLAGS.qa_weight = 0
            FLAGS.lm_weight = 0
            model = RLModel(vocab, training_mode=False)
        else:
            exit("Unrecognised model type: "+model_type)

        with model.graph.as_default():
            saver = tf.train.Saver()

        if FLAGS.eval_metrics:
            lm = LstmLmInstance()
            # qa = MpcmQaInstance()
            qa = QANetInstance()

            lm.load_from_chkpt(FLAGS.model_dir+'saved/lmtest')
            # qa.load_from_chkpt(FLAGS.model_dir+'saved/qatest')
            qa.load_from_chkpt(FLAGS.model_dir+'saved/qanet2')

            discriminator = DiscriminatorInstance(trainable=False, path=disc_path)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit)
        with tf.Session(graph=model.graph, config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            if not os.path.exists(chkpt_path):
                exit('Checkpoint path doesnt exist! '+chkpt_path)
            # summary_writer = tf.summary.FileWriter(FLAGS.log_directory+"eval/"+str(int(time.time())), sess.graph)

            saver.restore(sess, tf.train.latest_checkpoint(chkpt_path))
            # print('Loading not implemented yet')
            # else:
            #     sess.run(tf.global_variables_initializer())
            #     sess.run(model.glove_init_ops)

            num_steps = FLAGS.num_eval_samples//FLAGS.eval_batch_size

            # Initialise the dataset

            # np.random.shuffle(dev_data)
            dev_data_source.initialise(dev_data)

            f1s=[]
            bleus=[]
            qa_scores=[]
            qa_scores_gold=[]
            lm_scores=[]
            nlls=[]
            disc_scores=[]
            sowe_similarities=[]
            copy_probs=[]

            qgolds=[]
            qpreds=[]
            qpred_ids=[]
            qgold_ids=[]
            ctxts=[]
            answers=[]
            ans_positions=[]

            metric_individuals=[]
            res=[]
            for e in range(1):
                for i in tqdm(range(num_steps), desc='Epoch '+str(e)):
                    dev_batch, curr_batch_size = dev_data_source.get_batch()
                    pred_batch,pred_beam,pred_beam_lens,pred_ids,pred_lens,gold_batch, gold_lens,gold_ids,ctxt,ctxt_len,ans,ans_len,nll,copy_prob= sess.run([model.q_hat_beam_string, model.q_hat_full_beam_str, model.q_hat_full_beam_lens,model.q_hat_beam_ids,model.q_hat_beam_lens,model.question_raw, model.question_length, model.question_ids, model.context_raw, model.context_length, model.answer_locs, model.answer_length, model.nll, model.mean_copy_prob], feed_dict={model.input_batch: dev_batch ,model.is_training:False})

                    unfilt_ctxt_batch = [dev_contexts_unfilt[ix] for ix in dev_batch[3]]
                    a_text_batch = ops.byte_token_array_to_str(dev_batch[2][0], dev_batch[2][2], is_array=False)
                    unfilt_apos_batch = [dev_a_pos_unfilt[ix] for ix in dev_batch[3]]

                    # subtract 1 to remove the "end sent token"
                    pred_q_batch = [q.replace(' </Sent>',"").replace(" <PAD>","") for q in ops.byte_token_array_to_str(pred_batch, pred_lens-1)]

                    ctxts.extend(unfilt_ctxt_batch)
                    answers.extend(a_text_batch)
                    ans_positions.extend([dev_a_pos_unfilt[ix] for ix in dev_batch[3]])
                    copy_probs.extend(copy_prob.tolist())



                    # get QA score

                    # gold_str=[]
                    # pred_str=[]


                    gold_ans = ops.byte_token_array_to_str(dev_batch[2][0], dev_batch[2][2], is_array=False)
                    # pred_str = ops.byte_token_array_to_str([dev_batch[0][0][b][qa_pred[b][0]:qa_pred[b][1]] for b in range(curr_batch_size)], is_array=False)
                    nlls.extend(nll.tolist())

                    if FLAGS.eval_metrics:
                        qa_pred = qa.get_ans(unfilt_ctxt_batch, ops.byte_token_array_to_str(pred_batch, pred_lens))
                        gold_qa_pred = qa.get_ans(unfilt_ctxt_batch, ops.byte_token_array_to_str(dev_batch[1][0], dev_batch[1][3]))

                        qa_score_batch = [metrics.f1(metrics.normalize_answer(gold_ans[b]), metrics.normalize_answer(qa_pred[b])) for b in range(curr_batch_size)]
                        qa_score_gold_batch = [metrics.f1(metrics.normalize_answer(gold_ans[b]), metrics.normalize_answer(gold_qa_pred[b])) for b in range(curr_batch_size)]
                        lm_score_batch = lm.get_seq_perplexity(pred_q_batch).tolist()
                        disc_score_batch = discriminator.get_pred(unfilt_ctxt_batch, pred_q_batch, gold_ans, unfilt_apos_batch).tolist()

                    for b, pred in enumerate(pred_batch):
                        pred_str = pred_q_batch[b].replace(' </Sent>',"").replace(" <PAD>","")
                        gold_str = tokens_to_string(gold_batch[b][:gold_lens[b]-1])
                        f1s.append(metrics.f1(gold_str, pred_str))
                        bleus.append(metrics.bleu(gold_str, pred_str))
                        qgolds.append(gold_str)
                        qpreds.append(pred_str)

                        # calc cosine similarity between sums of word embeddings
                        pred_sowe = np.sum(np.asarray([glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros((FLAGS.embedding_size,)) for w in preprocessing.tokenise(pred_str ,asbytes=False)]) ,axis=0)
                        gold_sowe = np.sum(np.asarray([glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros((FLAGS.embedding_size,)) for w in preprocessing.tokenise(gold_str ,asbytes=False)]) ,axis=0)
                        this_similarity = np.inner(pred_sowe, gold_sowe)/np.linalg.norm(pred_sowe, ord=2)/np.linalg.norm(gold_sowe, ord=2)

                        sowe_similarities.append(this_similarity)



                        this_metric_dict={
                            'f1':f1s[-1],
                            'bleu': bleus[-1],
                            'nll': nlls[-1],
                            'sowe': sowe_similarities[-1]
                            }
                        if FLAGS.eval_metrics:
                            this_metric_dict={
                            **this_metric_dict,
                            'qa': qa_score_batch[b],
                            'lm': lm_score_batch[b],
                            'disc': disc_score_batch[b]}
                            qa_scores.extend(qa_score_batch)
                            lm_scores.extend(lm_score_batch)
                            disc_scores.extend(disc_score_batch)
                        metric_individuals.append(this_metric_dict)

                        res.append({
                            'c':unfilt_ctxt_batch[b],
                            'q_pred': pred_str,
                            'q_gold': gold_str,
                            'a_pos': unfilt_apos_batch[b],
                            'a_text': a_text_batch[b],
                            'metrics': this_metric_dict,

                            'q_pred_ids': pred_ids.tolist()[b],
                            'q_gold_ids': dev_batch[1][1][b].tolist()

                        })

                    # Quick output
                    if i==0:
                        # print(copy_prob.tolist())
                        # print(copy_probs)
                        pred_str = tokens_to_string(pred_batch[0][:pred_lens[0]-1])
                        gold_str = tokens_to_string(gold_batch[0][:gold_lens[0]-1])
                        # print(pred_str)
                        print(qpreds[0])
                        print(gold_str)


                        title=chkpt_path
                        out_str = output_eval(title,pred_batch,  pred_ids, pred_lens, gold_batch, gold_lens, ctxt, ctxt_len, ans, ans_len)
                        with open(FLAGS.log_directory+'out_eval_'+model_type+'.htm', 'w', encoding='utf-8') as fp:
                            fp.write(out_str)

            # res = list(zip(qpreds,qgolds,ctxts,answers,ans_positions,metric_individuals))
            metric_dict={
                'f1':np.mean(f1s),
                'bleu': metrics.bleu_corpus(qgolds, qpreds),
                'nll':np.mean(nlls),
                'sowe': np.mean(sowe_similarities)
                }
            if FLAGS.eval_metrics:
                metric_dict={**metric_dict,
                'qa':np.mean(qa_scores),
                'lm':np.mean(lm_scores),
                'disc': np.mean(disc_scores)}
            # print(res)
            with open(FLAGS.log_directory+'out_eval_'+model_type+("_test" if FLAGS.eval_on_test else "")+("_train" if (not FLAGS.eval_on_dev and not FLAGS.eval_on_test) else "")+'.json', 'w', encoding='utf-8') as fp:
                json.dump({"metrics":metric_dict, "results": res}, fp)


            print("F1: ", np.mean(f1s))
            print("BLEU: ", metrics.bleu_corpus(qgolds, qpreds))
            print("NLL: ", np.mean(nlls))
            print("SOWE: ", np.mean(sowe_similarities))

            print("Copy prob: ", np.mean(copy_probs))
            if FLAGS.eval_metrics:
                print("QA: ", np.mean(qa_scores))
                print("LM: ", np.mean(lm_scores))
                print("Disc: ", np.mean(disc_scores))
コード例 #5
0
ファイル: lm.py プロジェクト: trunghlt/question-generation
    def build_model(self):

        self.dropout_prob = FLAGS.lm_dropout

        with tf.device('/cpu:*'):
            # Load glove embeddings
            glove_embeddings = loader.load_glove(FLAGS.data_path,
                                                 d=FLAGS.embedding_size)
            embeddings_init = tf.constant(
                loader.get_embeddings(self.vocab,
                                      glove_embeddings,
                                      D=FLAGS.embedding_size))
            self.embeddings = tf.get_variable('word_embeddings',
                                              initializer=embeddings_init,
                                              dtype=tf.float32)
            # self.embeddings = tf.get_variable('word_embeddings', (len(self.vocab), FLAGS.embedding_size), dtype=tf.float32)
            assert self.embeddings.shape == [
                len(self.vocab), self.embedding_size
            ]

        # input placeholder
        self.input_seqs = tf.placeholder(tf.int32, [None, None])

        self.input_lengths = tf.reduce_sum(tf.cast(
            tf.not_equal(self.input_seqs, 0), tf.int32),
                                           axis=1)

        self.input_embedded = tf.nn.embedding_lookup(self.embeddings,
                                                     self.input_seqs)

        self.tgt_input = self.input_embedded[:, :
                                             -1, :]  # start:end-1 - embedded
        self.tgt_output = self.input_seqs[:, 1:]  # start+1:end - ids

        # RNN
        # cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(num_units=self.num_units),
        #     input_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
        #     state_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
        #     output_keep_prob=(tf.cond(self.is_training,lambda: 1.0 - self.dropout_prob,lambda: 1.)),
        #     input_size=self.embedding_size,
        #     variational_recurrent=True,
        #     dtype=tf.float32)
        cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
            num_units=self.num_units,
            dropout_keep_prob=tf.cond(self.is_training,
                                      lambda: 1.0 - self.dropout_prob,
                                      lambda: 1.))

        outputs, states = tf.nn.dynamic_rnn(cell,
                                            self.tgt_input,
                                            dtype=tf.float32)

        self.logits = tf.layers.dense(outputs, len(self.vocab))
        self.probs = tf.nn.softmax(self.logits)
        self.preds = tf.argmax(self.probs, axis=2, output_type=tf.int32)

        # loss fn + opt
        self.target_weights = tf.sequence_mask(self.input_lengths - 1,
                                               tf.shape(self.input_seqs)[1] -
                                               1,
                                               dtype=tf.float32)
        self.loss = tf.reduce_mean(
            tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.logits, labels=self.tgt_output) *
                          self.target_weights,
                          axis=1) /
            tf.cast(tf.reduce_sum(self.target_weights, axis=1), tf.float32),
            axis=0)

        if self.training_mode:
            self.optimise = tf.train.AdamOptimizer(1e-4).minimize(self.loss)

        # seq evaluation
        self.log_probs = tf.reduce_sum(
            tf.one_hot(self.tgt_output, depth=len(self.vocab)) * self.probs,
            axis=2)
        self.seq_log_prob = tf.reduce_sum(
            ops.log2(self.log_probs) * self.target_weights, axis=1) / (tf.cast(
                tf.reduce_sum(self.target_weights, axis=1), tf.float32) + 1e-6)

        # metrics
        self.perplexity = tf.minimum(10000.0,
                                     tf.pow(2.0, -1.0 * self.seq_log_prob))
        self._train_summaries.append(
            tf.summary.scalar("train_perf/perplexity",
                              tf.reduce_mean(self.perplexity)))
        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(self.preds, self.tgt_output), tf.float32))