def sent_level_attention(self):
        with tf.variable_scope('sent-level') as scope:
            sent_inputs = tf.reshape(self.word_outputs, [-1, self.max_sent_length, 2 * self.cell_dim])

            # sentence encoder
            cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw')
            cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw')

            init_state_fw = tf.tile(tf.get_variable('init_state_fw',
                                                  shape=[1, self.cell_dim],
                                                  initializer=tf.constant_initializer(0)),
                                  multiples=[get_shape(sent_inputs)[0], 1])
            init_state_bw = tf.tile(tf.get_variable('init_state_bw',
                                                  shape=[1, self.cell_dim],
                                                  initializer=tf.constant_initializer(0)),
                                  multiples=[get_shape(sent_inputs)[0], 1])

            rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw,
                                             cell_bw=cell_bw,
                                             inputs=sent_inputs,
                                             input_lengths=self.sent_lengths,
                                             initial_state_fw=init_state_fw,
                                             initial_state_bw=init_state_bw,
                                             scope=scope)

            sent_outputs, sent_att_weights = attention(inputs=rnn_outputs,
                                                     att_dim=self.att_dim,
                                                     sequence_lengths=self.sent_lengths)
            self.sent_outputs = tf.layers.dropout(sent_outputs, self.dropout_rate, training=self.is_training)
    def _init_word_encoder(self):
        '''
    Build Word Encoder part as in the paper
    :return:
    '''
        with tf.variable_scope('word-encoder') as scope:

            # collapses num docs,num of sentences and creates (number sentences, number words,embedding)
            # treats each sentece independent of docs, sentence location
            word_inputs = tf.reshape(self.embedded_inputs,
                                     [-1, self.max_word_length, self.emb_size])

            # containing the length of each sentence
            word_lengths = tf.reshape(self.word_lengths, [-1])

            # define forward and backword GRU cells
            cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw')
            cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw')

            # initialize state of forward GRU cell as 0's, for each sentence in batch
            init_state_fw = tf.tile(tf.get_variable(
                'init_state_fw',
                shape=[1, self.cell_dim],
                initializer=tf.constant_initializer(0)),
                                    multiples=[get_shape(word_inputs)[0], 1])
            # same but for backward GRU cell
            init_state_bw = tf.tile(tf.get_variable(
                'init_state_bw',
                shape=[1, self.cell_dim],
                initializer=tf.constant_initializer(0)),
                                    multiples=[get_shape(word_inputs)[0], 1])

            # bidirectional_rnn returns outputs, state; why do we keep the output and not hidden state???
            rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw,
                                               cell_bw=cell_bw,
                                               inputs=word_inputs,
                                               input_lengths=word_lengths,
                                               initial_state_fw=init_state_fw,
                                               initial_state_bw=init_state_bw,
                                               scope=scope)
            # rnn_outputs.shape = [number sentences, number words, 2*self.cell_dim]

            # word_outputs sentence vectors, word_att_weights alpha
            # output dim for word_outputs (num sentences,1,2* hidden state cell dim); sentence vectors as in paper
            word_outputs, word_att_weights = attention(
                inputs=rnn_outputs,
                att_dim=self.att_dim,
                sequence_lengths=word_lengths)

            # apply dropout, only activate during training
            self.word_outputs = tf.layers.dropout(word_outputs,
                                                  self.dropout_rate,
                                                  training=self.is_training)
    def _init_sent_encoder(self):
        '''
    Build Sentence Encoder part as in the paper
    :return:
    '''
        with tf.variable_scope('sent-encoder') as scope:

            # input shape: (number docs, max sentence per document, 2*cell_dim)
            sent_inputs = tf.reshape(
                self.word_outputs,
                [-1, self.max_sent_length, 2 * self.cell_dim])

            # sentence encoder
            cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw')
            cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw')

            # for each document get the hidden state array
            init_state_fw = tf.tile(tf.get_variable(
                'init_state_fw',
                shape=[1, self.cell_dim],
                initializer=tf.constant_initializer(0)),
                                    multiples=[get_shape(sent_inputs)[0], 1])
            init_state_bw = tf.tile(tf.get_variable(
                'init_state_bw',
                shape=[1, self.cell_dim],
                initializer=tf.constant_initializer(0)),
                                    multiples=[get_shape(sent_inputs)[0], 1])

            rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw,
                                               cell_bw=cell_bw,
                                               inputs=sent_inputs,
                                               input_lengths=self.sent_lengths,
                                               initial_state_fw=init_state_fw,
                                               initial_state_bw=init_state_bw,
                                               scope=scope)
            # rnn_outputs.shape = [num docs, number sentences, 2*self.cell_dim]

            # Returns document vectors
            # output dim for word_outputs (num docs,1,2* hidden state cell dim); sentence vectors as in paper
            sent_outputs, sent_att_weights = attention(
                inputs=rnn_outputs,
                att_dim=self.att_dim,
                sequence_lengths=self.sent_lengths)

            #dropout
            self.sent_outputs = tf.layers.dropout(sent_outputs,
                                                  self.dropout_rate,
                                                  training=self.is_training)
Example #4
0
    def _init_inter_review_encoder(self):  # reviews encoding
        with tf.variable_scope('inter-review-encoder') as scope:
            review_inputs = tf.reshape(
                self.sent_outputs,
                [-1, self.max_review_length, 4 * self.emb_size])
            sent_inputs_mask_temp = tf.cast(self.docs, tf.bool)
            sent_inputs_mask = tf.reduce_any(sent_inputs_mask_temp,
                                             reduction_indices=[3])
            review_inputs_mask = tf.reduce_any(sent_inputs_mask,
                                               reduction_indices=[2])

            # reviews GRU encoder
            cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw')
            cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw')

            init_state_fw = tf.tile(tf.get_variable(
                'init_state_fw',
                shape=[1, self.cell_dim],
                initializer=tf.constant_initializer(0)),
                                    multiples=[get_shape(review_inputs)[0], 1])
            init_state_bw = tf.tile(tf.get_variable(
                'init_state_bw',
                shape=[1, self.cell_dim],
                initializer=tf.constant_initializer(0)),
                                    multiples=[get_shape(review_inputs)[0], 1])

            rnn_outputs, _ = bidirectional_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=review_inputs,
                input_lengths=self.review_lengths,
                initial_state_fw=init_state_fw,
                initial_state_bw=init_state_bw,
                scope=scope)

            reviews_encoding = disan(rnn_outputs, review_inputs_mask, 'DiSAN',
                                     self.dropout_rate, self.is_training, 0.,
                                     'elu', None, 'reviews-encoding')
            self.review_outputs = reviews_encoding
Example #5
0
    def _init_sent_encoder(self):
        with tf.variable_scope('sentence') as scope:
            sentence_rnn_inputs = tf.reshape(
                self.word_outputs,
                [-1, self.max_num_sents, 2 * self.hidden_dim])

            # sentence encoder
            cell_fw = rnn.GRUCell(self.hidden_dim)
            cell_bw = rnn.GRUCell(self.hidden_dim)

            init_state_fw = tf.tile(
                tf.get_variable('init_state_fw',
                                shape=[1, self.hidden_dim],
                                initializer=tf.constant_initializer(1.0)),
                multiples=[get_shape(sentence_rnn_inputs)[0], 1])
            init_state_bw = tf.tile(
                tf.get_variable('init_state_bw',
                                shape=[1, self.hidden_dim],
                                initializer=tf.constant_initializer(1.0)),
                multiples=[get_shape(sentence_rnn_inputs)[0], 1])

            sentence_rnn_outputs, _ = bidirectional_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=sentence_rnn_inputs,
                input_lengths=self.document_lengths,
                initial_state_fw=init_state_fw,
                initial_state_bw=init_state_bw,
                scope=scope)

            self.sentence_outputs, self.sent_att_weights, self.img_att_weights = visual_aspect_attention(
                text_input=sentence_rnn_outputs,
                visual_input=self.images,
                att_dim=self.att_dim,
                sequence_lengths=self.document_lengths)

            self.sentence_outputs = tf.nn.dropout(
                self.sentence_outputs, keep_prob=self.dropout_keep_prob)
Example #6
0
    def _init_word_encoder(self):
        with tf.variable_scope('word') as scope:
            word_rnn_inputs = tf.reshape(
                self.embedded_inputs, [-1, self.max_num_words, self.emb_size])
            sentence_lengths = tf.reshape(self.sentence_lengths, [-1])

            # word encoder
            cell_fw = rnn.GRUCell(self.hidden_dim)
            cell_bw = rnn.GRUCell(self.hidden_dim)

            init_state_fw = tf.tile(
                tf.get_variable('init_state_fw',
                                shape=[1, self.hidden_dim],
                                initializer=tf.constant_initializer(1.0)),
                multiples=[get_shape(word_rnn_inputs)[0], 1])
            init_state_bw = tf.tile(
                tf.get_variable('init_state_bw',
                                shape=[1, self.hidden_dim],
                                initializer=tf.constant_initializer(1.0)),
                multiples=[get_shape(word_rnn_inputs)[0], 1])

            word_rnn_outputs, _ = bidirectional_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=word_rnn_inputs,
                input_lengths=sentence_lengths,
                initial_state_fw=init_state_fw,
                initial_state_bw=init_state_bw,
                scope=scope)

            self.word_outputs, self.word_att_weights = text_attention(
                inputs=word_rnn_outputs,
                att_dim=self.att_dim,
                sequence_lengths=sentence_lengths)

            self.word_outputs = tf.nn.dropout(self.word_outputs,
                                              keep_prob=self.dropout_keep_prob)
Example #7
0
    def _init_word_encoder(self):
        with tf.variable_scope('word-encoder') as scope:
            word_inputs = tf.reshape(self.embedded_inputs,
                                     [-1, self.max_word_length, self.emb_size])
            word_lengths = tf.reshape(self.word_lengths, [-1])

            # word encoder
            cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw')
            cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw')

            init_state_fw = tf.tile(tf.get_variable(
                'init_state_fw',
                shape=[1, self.cell_dim],
                initializer=tf.constant_initializer(0)),
                                    multiples=[get_shape(word_inputs)[0], 1])
            init_state_bw = tf.tile(tf.get_variable(
                'init_state_bw',
                shape=[1, self.cell_dim],
                initializer=tf.constant_initializer(0)),
                                    multiples=[get_shape(word_inputs)[0], 1])

            rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw,
                                               cell_bw=cell_bw,
                                               inputs=word_inputs,
                                               input_lengths=word_lengths,
                                               initial_state_fw=init_state_fw,
                                               initial_state_bw=init_state_bw,
                                               scope=scope)

            word_outputs, word_att_weights = attention(
                inputs=rnn_outputs,
                att_dim=self.att_dim,
                sequence_lengths=word_lengths)
            self.word_outputs = tf.layers.dropout(word_outputs,
                                                  self.dropout_rate,
                                                  training=self.is_training)
Example #8
0
    def __init__(self,
                 batch_size=None,
                 learning_rate=None,
                 load_glove=True,
                 is_training=True):

        # if batch size is not specified, default to value in hyperparams.py
        self.batch_size = batch_size or Hp.batch_size
        self.learning_rate = learning_rate or Hp.learning_rate

        # TODO: implement handling of character embedding
        # load pre-trained GloVe dictionary; create embedding matrix
        if 'word_matrix.npy' not in os.listdir('data'):
            word_glove = glove_dict(Hp.glove_word) if load_glove else {}
            word_matrix = embedding_matrix(word_glove, 'word')
        else:
            word_matrix = np.load('./data/word_matrix.npy')

        # input placeholders (integer encoded sentences) & labels
        with tf.variable_scope('inputs'):
            self.p_word_inputs = tf.placeholder(
                tf.int32, [self.batch_size, Hp.max_p_words], 'p_words')
            self.q_word_inputs = tf.placeholder(
                tf.int32, [self.batch_size, Hp.max_q_words], 'q_words')
            self.labels = tf.placeholder(tf.int32, [self.batch_size, 2],
                                         'labels')

        # input length placeholders (actual non-padded length of each sequence in batch; dictates length of unrolling)
        with tf.variable_scope('seq_lengths'):
            self.p_word_lengths = tf.placeholder(tf.int32, [self.batch_size],
                                                 'p_words')
            self.q_word_lengths = tf.placeholder(tf.int32, [self.batch_size],
                                                 'q_words')

        # create tensor for word embedding matrix, lookup GloVe embeddings of inputs
        with tf.variable_scope('initial_embeddings'):
            self.word_matrix = tf.Variable(tf.constant(word_matrix,
                                                       dtype=tf.float32),
                                           trainable=False,
                                           name='word_matrix')
            self.p_word_embeds = tf.nn.embedding_lookup(self.word_matrix,
                                                        self.p_word_inputs,
                                                        name='p_word_embeds')
            self.q_word_embeds = tf.nn.embedding_lookup(self.word_matrix,
                                                        self.q_word_inputs,
                                                        name='q_word_embeds')

        # encode both paragraph & question using bi-directional RNN
        with tf.variable_scope('p_encodings'):
            self.p_encodings, states = bidirectional_rnn(
                self.p_word_embeds, self.p_word_lengths, Hp.rnn1_cell,
                Hp.rnn1_layers, Hp.rnn1_units, Hp.rnn1_dropout, is_training)
        with tf.variable_scope('q_encodings'):
            self.q_encodings, _ = bidirectional_rnn(
                self.q_word_embeds, self.q_word_lengths, Hp.rnn1_cell,
                Hp.rnn1_layers, Hp.rnn1_units, Hp.rnn1_dropout, is_training)

        # proofread questions by attending over itself
        with tf.variable_scope('q_proofread'):
            self.q_pr_out, _, self.q_pr_attn = attention_alignment(
                self.q_encodings, self.q_word_lengths, self.q_encodings,
                self.q_word_lengths, Hp.attn_layers, Hp.attn_units,
                Hp.attn_dropout, Hp.attn_cell, Hp.attn_mech, is_training)
        # create question-aware paragraph encoding using bi-directional RNN with attention
        with tf.variable_scope('q_aware_encoding'):
            self.pq_encoding, _, self.p2q_attn = attention_alignment(
                self.p_encodings, self.p_word_lengths, self.q_pr_out,
                self.q_word_lengths, Hp.attn_layers, Hp.attn_units,
                Hp.attn_dropout, Hp.attn_cell, Hp.attn_mech, is_training)

        # create paragraph encoding with self-matching attention
        # TODO: if decoder is uni-directional, which hidden state from BiRNN should be fed to initial state?
        with tf.variable_scope('self_matching'):
            self.pp_encoding, _, self.p2p_attn = attention_alignment(
                self.pq_encoding, self.p_word_lengths, self.pq_encoding,
                self.p_word_lengths, Hp.attn_layers, Hp.attn_units,
                Hp.attn_dropout, Hp.attn_cell, Hp.attn_mech, is_training)

        # find pointers (in paragraph) to beginning and end of answer to question
        with tf.variable_scope('pointer_net'):
            self.pointer_prob = pointer_net(self.pp_encoding,
                                            self.p_word_lengths, 2,
                                            self.word_matrix, Hp.ptr_cell,
                                            Hp.ptr_layers, Hp.ptr_units,
                                            Hp.ptr_dropout, is_training)
            self.pointers = tf.unstack(
                tf.argmax(self.pointer_prob, axis=2, output_type=tf.int32))

        # compute loss function
        with tf.variable_scope('loss'):
            loss = tf.zeros(())
            pointers = tf.unstack(self.pointer_prob)
            labels = tf.unstack(self.labels, axis=1)
            equal = []

            for i in range(2):
                loss += tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels[i], logits=pointers[i])
                equal.append(tf.equal(self.pointers[i], labels[i]))
            self.loss = tf.reduce_mean(loss)
            self.correct = tf.cast(tf.stack(equal), tf.float32)
            self.all_correct = tf.cast(
                tf.equal(tf.reduce_sum(self.correct, axis=0), 2), tf.float32)
            self.exact_match = tf.reduce_mean(self.all_correct)

            self.train_step = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.loss)