Example #1
0
    def __init__(self, data, FLAGS):
        super(Model, self).__init__(data, FLAGS)

        encoder_embedding_size = 32 * 4
        encoder_vocabulary_length = len(data.idx2word_history)

        decoder_lstm_size = 16 * 2
        decoder_embedding_size = 16 * 2
        decoder_sequence_length = data.batch_actions.shape[2]
        decoder_vocabulary_length = len(data.idx2word_action)

        with tf.name_scope('data'):
            batch_histories = tf.Variable(data.batch_histories,
                                          name='histories',
                                          trainable=False)
            batch_actions = tf.Variable(data.batch_actions,
                                        name='actions',
                                        trainable=False)

            histories = tf.gather(batch_histories, self.batch_idx)
            actions = tf.gather(batch_actions, self.batch_idx)

        with tf.name_scope('model'):
            batch_size = tf.shape(histories)[0]

            encoder_embedding = embedding(input=histories,
                                          length=encoder_vocabulary_length,
                                          size=encoder_embedding_size,
                                          name='encoder_embedding')

            with tf.name_scope("UtterancesEncoder"):
                conv3 = encoder_embedding
                # conv3 = conv2d(
                #         input=conv3,
                #         filter=[1, 3, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_utt_size_3_layer_1'
                # )
                # conv_s3 = conv2d(
                #         input=conv_s3,
                #         filter=[1, 3, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_utt_size_3_layer_2'
                # )
                # print(conv3)
                # k = encoder_sequence_length
                # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1])
                # print(mp_s3)

                # encoded_utterances = mp_s3
                encoded_utterances = tf.reduce_max(conv3, [2], keep_dims=True)

            with tf.name_scope("HistoryEncoder"):
                conv3 = encoded_utterances
                # conv3 = conv2d(
                #         input=conv3,
                #         filter=[3, 1, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_hist_size_3_layer_1'
                # )
                # conv_s3 = conv2d(
                #         input=conv_s3,
                #         filter=[3, 1, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_hist_size_3_layer_2'
                # )
                # print(conv3)
                # k = encoder_sequence_length
                # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1])
                # print(mp_s3)

                encoded_history = tf.reduce_max(conv3, [1, 2])

                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=encoder_embedding_size,
                #         name='linear_projection_1'
                # )
                # encoded_history = tf.nn.relu(projection)
                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=encoder_embedding_size,
                #         name='linear_projection_2'
                # )
                # encoded_history = tf.nn.relu(projection)
                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=decoder_lstm_size * 2,
                #         name='linear_projection_3'
                # )
                # encoded_history = tf.nn.relu(projection)

            with tf.name_scope("Decoder"):
                with tf.name_scope("RNNDecoderCell"):
                    cell = LSTMCell(
                        num_units=decoder_lstm_size,
                        input_size=decoder_embedding_size +
                        encoder_embedding_size,
                        use_peepholes=True,
                    )
                    initial_state = cell.zero_state(batch_size, tf.float32)

                # decode all histories along the utterance axis
                final_encoder_state = encoded_history

                decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                    cell=cell,
                    inputs=[
                        actions[:, word]
                        for word in range(decoder_sequence_length)
                    ],
                    static_input=final_encoder_state,
                    initial_state=initial_state,  # final_encoder_state,
                    embedding_size=decoder_embedding_size,
                    embedding_length=decoder_vocabulary_length,
                    sequence_length=decoder_sequence_length,
                    name='RNNDecoder',
                    reuse=False,
                    use_inputs_prob=self.use_inputs_prob)

                self.predictions = tf.concat(1, decoder_outputs_softmax)

        if FLAGS.print_variables:
            for v in tf.trainable_variables():
                print(v.name)

        with tf.name_scope('loss'):
            one_hot_labels = dense_to_one_hot(actions,
                                              decoder_vocabulary_length)
            self.loss = tf.reduce_mean(
                -one_hot_labels *
                tf.log(tf.clip_by_value(self.predictions, 1e-10, 1.0)),
                name='loss')
            tf.scalar_summary('loss', self.loss)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2),
                                          tf.argmax(self.predictions, 2))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                   'float'))
            tf.scalar_summary('accuracy', self.accuracy)
Example #2
0
    def __init__(self, data, FLAGS):
        super(Model, self).__init__(data, FLAGS)

        encoder_embedding_size = 16 * 2
        encoder_lstm_size = 16
        encoder_vocabulary_length = len(data.idx2word_history)
        history_length = data.train_set['histories'].shape[1]
        encoder_sequence_length = data.train_set['histories'].shape[2]

        decoder_lstm_size = 16
        decoder_embedding_size = 16
        decoder_sequence_length = data.batch_actions.shape[2]
        decoder_vocabulary_length = len(data.idx2word_action)

        with tf.name_scope('data'):
            batch_histories = tf.Variable(data.batch_histories,
                                          name='histories',
                                          trainable=False)
            batch_actions = tf.Variable(data.batch_actions,
                                        name='actions',
                                        trainable=False)

            histories = tf.gather(batch_histories, self.batch_idx)
            actions = tf.gather(batch_actions, self.batch_idx)

        # inference model
        with tf.name_scope('model'):
            batch_size = tf.shape(histories)[0]

            encoder_embedding = embedding(input=histories,
                                          length=encoder_vocabulary_length,
                                          size=encoder_embedding_size,
                                          name='encoder_embedding')

            with tf.name_scope("UtterancesEncoder"):
                with tf.name_scope("RNNForwardUtteranceEncoderCell_1"):
                    cell_fw_1 = LSTMCell(num_units=encoder_lstm_size,
                                         input_size=encoder_embedding_size,
                                         use_peepholes=True)
                    initial_state_fw_1 = cell_fw_1.zero_state(
                        batch_size, tf.float32)

                with tf.name_scope("RNNBackwardUtteranceEncoderCell_1"):
                    cell_bw_1 = LSTMCell(num_units=encoder_lstm_size,
                                         input_size=encoder_embedding_size,
                                         use_peepholes=True)
                    initial_state_bw_1 = cell_bw_1.zero_state(
                        batch_size, tf.float32)

                with tf.name_scope("RNNForwardUtteranceEncoderCell_2"):
                    cell_fw_2 = LSTMCell(num_units=encoder_lstm_size,
                                         input_size=cell_fw_1.output_size +
                                         cell_bw_1.output_size,
                                         use_peepholes=True)
                    initial_state_fw_2 = cell_fw_2.zero_state(
                        batch_size, tf.float32)

                # the input data has this dimensions
                # [
                #   #batch,
                #   #utterance in a history (a dialogue),
                #   #word in an utterance (a sentence),
                #   embedding dimension
                # ]

                # encode all utterances along the word axis
                encoder_states_2d = []

                for utterance in range(history_length):
                    encoder_outputs, _ = brnn(
                        cell_fw=cell_fw_1,
                        cell_bw=cell_bw_1,
                        inputs=[
                            encoder_embedding[:, utterance, word, :]
                            for word in range(encoder_sequence_length)
                        ],
                        initial_state_fw=initial_state_fw_1,
                        initial_state_bw=initial_state_bw_1,
                        name='RNNUtteranceBidirectionalLayer',
                        reuse=True if utterance > 0 else None)

                    _, encoder_states = rnn(
                        cell=cell_fw_2,
                        inputs=encoder_outputs,
                        initial_state=initial_state_fw_2,
                        name='RNNUtteranceForwardEncoder',
                        reuse=True if utterance > 0 else None)

                    # print(encoder_states[-1])
                    encoder_states = tf.concat(
                        1, tf.expand_dims(encoder_states[-1], 1))
                    # print(encoder_states)
                    encoder_states_2d.append(encoder_states)

                encoder_states_2d = tf.concat(1, encoder_states_2d)
                # print('encoder_states_2d', encoder_states_2d)

            with tf.name_scope("HistoryEncoder"):
                # encode all histories along the utterance axis
                with tf.name_scope("RNNForwardHistoryEncoderCell_1"):
                    cell_fw_1 = LSTMCell(num_units=encoder_lstm_size,
                                         input_size=cell_fw_2.state_size,
                                         use_peepholes=True)
                    initial_state_fw_1 = cell_fw_1.zero_state(
                        batch_size, tf.float32)

                with tf.name_scope("RNNBackwardHistoryEncoderCell_1"):
                    cell_bw_1 = LSTMCell(num_units=encoder_lstm_size,
                                         input_size=cell_fw_2.state_size,
                                         use_peepholes=True)
                    initial_state_bw_1 = cell_fw_2.zero_state(
                        batch_size, tf.float32)

                with tf.name_scope("RNNForwardHistoryEncoderCell_2"):
                    cell_fw_2 = LSTMCell(num_units=encoder_lstm_size,
                                         input_size=cell_fw_1.output_size +
                                         cell_bw_1.output_size,
                                         use_peepholes=True)
                    initial_state_fw_2 = cell_fw_2.zero_state(
                        batch_size, tf.float32)

                encoder_outputs, _ = brnn(
                    cell_fw=cell_fw_1,
                    cell_bw=cell_bw_1,
                    inputs=[
                        encoder_states_2d[:, utterance, :]
                        for utterance in range(history_length)
                    ],
                    initial_state_fw=initial_state_fw_1,
                    initial_state_bw=initial_state_bw_1,
                    name='RNNHistoryBidirectionalLayer',
                    reuse=None)

                _, encoder_states = rnn(cell=cell_fw_2,
                                        inputs=encoder_outputs,
                                        initial_state=initial_state_fw_2,
                                        name='RNNHistoryForwardEncoder',
                                        reuse=None)

            with tf.name_scope("Decoder"):
                with tf.name_scope("RNNDecoderCell"):
                    cell = LSTMCell(
                        num_units=decoder_lstm_size,
                        input_size=decoder_embedding_size +
                        cell_fw_2.state_size,
                        use_peepholes=True,
                    )
                    initial_state = cell.zero_state(batch_size, tf.float32)

                # decode all histories along the utterance axis
                final_encoder_state = encoder_states[-1]

                decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                    cell=cell,
                    inputs=[
                        actions[:, word]
                        for word in range(decoder_sequence_length)
                    ],
                    static_input=final_encoder_state,
                    initial_state=initial_state,  #final_encoder_state,
                    embedding_size=decoder_embedding_size,
                    embedding_length=decoder_vocabulary_length,
                    sequence_length=decoder_sequence_length,
                    name='RNNDecoder',
                    reuse=False,
                    use_inputs_prob=self.use_inputs_prob)

                self.predictions = tf.concat(1, decoder_outputs_softmax)
                # print(p_o_i)

        if FLAGS.print_variables:
            for v in tf.trainable_variables():
                print(v.name)

        with tf.name_scope('loss'):
            one_hot_labels = dense_to_one_hot(actions,
                                              decoder_vocabulary_length)
            self.loss = tf.reduce_mean(
                -one_hot_labels *
                tf.log(tf.clip_by_value(self.predictions, 1e-10, 1.0)),
                name='loss')
            tf.scalar_summary('loss', self.loss)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2),
                                          tf.argmax(self.predictions, 2))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                   'float'))
            tf.scalar_summary('accuracy', self.accuracy)
Example #3
0
    def __init__(self, data, FLAGS):
        super(Model, self).__init__(data, FLAGS)

        encoder_embedding_size = 32 * 4
        encoder_vocabulary_length = len(data.idx2word_history)

        decoder_lstm_size = 16 * 2
        decoder_embedding_size = 16 * 2
        decoder_sequence_length = data.batch_actions.shape[2]
        decoder_vocabulary_length = len(data.idx2word_action)

        with tf.name_scope('data'):
            batch_histories = tf.Variable(data.batch_histories, name='histories', trainable=False)
            batch_actions = tf.Variable(data.batch_actions, name='actions', trainable=False)

            histories = tf.gather(batch_histories, self.batch_idx)
            actions = tf.gather(batch_actions, self.batch_idx)

        with tf.name_scope('model'):
            batch_size = tf.shape(histories)[0]

            encoder_embedding = embedding(
                input=histories,
                length=encoder_vocabulary_length,
                size=encoder_embedding_size,
                name='encoder_embedding'
            )

            with tf.name_scope("UtterancesEncoder"):
                conv3 = encoder_embedding
                # conv3 = conv2d(
                #         input=conv3,
                #         filter=[1, 3, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_utt_size_3_layer_1'
                # )
                # conv_s3 = conv2d(
                #         input=conv_s3,
                #         filter=[1, 3, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_utt_size_3_layer_2'
                # )
                # print(conv3)
                # k = encoder_sequence_length
                # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1])
                # print(mp_s3)

                # encoded_utterances = mp_s3
                encoded_utterances = tf.reduce_max(conv3, [2], keep_dims=True)

            with tf.name_scope("HistoryEncoder"):
                conv3 = encoded_utterances
                # conv3 = conv2d(
                #         input=conv3,
                #         filter=[3, 1, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_hist_size_3_layer_1'
                # )
                # conv_s3 = conv2d(
                #         input=conv_s3,
                #         filter=[3, 1, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_hist_size_3_layer_2'
                # )
                # print(conv3)
                # k = encoder_sequence_length
                # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1])
                # print(mp_s3)

                encoded_history = tf.reduce_max(conv3, [1, 2])

                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=encoder_embedding_size,
                #         name='linear_projection_1'
                # )
                # encoded_history = tf.nn.relu(projection)
                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=encoder_embedding_size,
                #         name='linear_projection_2'
                # )
                # encoded_history = tf.nn.relu(projection)
                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=decoder_lstm_size * 2,
                #         name='linear_projection_3'
                # )
                # encoded_history = tf.nn.relu(projection)

            with tf.name_scope("Decoder"):
                with tf.name_scope("RNNDecoderCell"):
                    cell = LSTMCell(
                        num_units=decoder_lstm_size,
                        input_size=decoder_embedding_size + encoder_embedding_size,
                        use_peepholes=True,
                    )
                    initial_state = cell.zero_state(batch_size, tf.float32)

                # decode all histories along the utterance axis
                final_encoder_state = encoded_history

                decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                    cell=cell,
                    inputs=[actions[:, word] for word in range(decoder_sequence_length)],
                    static_input=final_encoder_state,
                    initial_state=initial_state,  # final_encoder_state,
                    embedding_size=decoder_embedding_size,
                    embedding_length=decoder_vocabulary_length,
                    sequence_length=decoder_sequence_length,
                    name='RNNDecoder',
                    reuse=False,
                    use_inputs_prob=self.use_inputs_prob
                )

                self.predictions = tf.concat(1, decoder_outputs_softmax)

        if FLAGS.print_variables:
            for v in tf.trainable_variables():
                print(v.name)

        with tf.name_scope('loss'):
            one_hot_labels = dense_to_one_hot(actions, decoder_vocabulary_length)
            self.loss = tf.reduce_mean(- one_hot_labels * tf.log(tf.clip_by_value(self.predictions, 1e-10, 1.0)),
                                       name='loss')
            tf.scalar_summary('loss', self.loss)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(self.predictions, 2))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
            tf.scalar_summary('accuracy', self.accuracy)
Example #4
0
    def __init__(self, data, FLAGS):
        super(Model, self).__init__(data, FLAGS)

        encoder_embedding_size = 16 * 2
        encoder_lstm_size = 16
        encoder_vocabulary_length = len(data.idx2word_history)
        history_length = data.train_set['histories'].shape[1]
        encoder_sequence_length = data.train_set['histories'].shape[2]

        decoder_lstm_size = 16
        decoder_embedding_size = 16
        decoder_sequence_length = data.batch_actions.shape[2]
        decoder_vocabulary_length = len(data.idx2word_action)

        with tf.name_scope('data'):
            batch_histories = tf.Variable(data.batch_histories, name='histories', trainable=False)
            batch_actions = tf.Variable(data.batch_actions, name='actions', trainable=False)

            histories = tf.gather(batch_histories, self.batch_idx)
            actions = tf.gather(batch_actions, self.batch_idx)

        # inference model
        with tf.name_scope('model'):
            batch_size = tf.shape(histories)[0]

            encoder_embedding = embedding(
                    input=histories,
                    length=encoder_vocabulary_length,
                    size=encoder_embedding_size,
                    name='encoder_embedding'
            )

            with tf.name_scope("UtterancesEncoder"):
                with tf.name_scope("RNNForwardUtteranceEncoderCell_1"):
                    cell_fw_1 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=encoder_embedding_size,
                            use_peepholes=True
                    )
                    initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNBackwardUtteranceEncoderCell_1"):
                    cell_bw_1 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=encoder_embedding_size,
                            use_peepholes=True
                    )
                    initial_state_bw_1 = cell_bw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNForwardUtteranceEncoderCell_2"):
                    cell_fw_2 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=cell_fw_1.output_size + cell_bw_1.output_size,
                            use_peepholes=True
                    )
                    initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32)

                # the input data has this dimensions
                # [
                #   #batch,
                #   #utterance in a history (a dialogue),
                #   #word in an utterance (a sentence),
                #   embedding dimension
                # ]

                # encode all utterances along the word axis
                encoder_states_2d = []

                for utterance in range(history_length):
                    encoder_outputs, _ = brnn(
                            cell_fw=cell_fw_1,
                            cell_bw=cell_bw_1,
                            inputs=[encoder_embedding[:, utterance, word, :] for word in
                                    range(encoder_sequence_length)],
                            initial_state_fw=initial_state_fw_1,
                            initial_state_bw=initial_state_bw_1,
                            name='RNNUtteranceBidirectionalLayer',
                            reuse=True if utterance > 0 else None
                    )

                    _, encoder_states = rnn(
                            cell=cell_fw_2,
                            inputs=encoder_outputs,
                            initial_state=initial_state_fw_2,
                            name='RNNUtteranceForwardEncoder',
                            reuse=True if utterance > 0 else None
                    )

                    # print(encoder_states[-1])
                    encoder_states = tf.concat(1, tf.expand_dims(encoder_states[-1], 1))
                    # print(encoder_states)
                    encoder_states_2d.append(encoder_states)

                encoder_states_2d = tf.concat(1, encoder_states_2d)
                # print('encoder_states_2d', encoder_states_2d)

            with tf.name_scope("HistoryEncoder"):
                # encode all histories along the utterance axis
                with tf.name_scope("RNNForwardHistoryEncoderCell_1"):
                    cell_fw_1 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=cell_fw_2.state_size,
                            use_peepholes=True
                    )
                    initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNBackwardHistoryEncoderCell_1"):
                    cell_bw_1 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=cell_fw_2.state_size,
                            use_peepholes=True
                    )
                    initial_state_bw_1 = cell_fw_2.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNForwardHistoryEncoderCell_2"):
                    cell_fw_2 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=cell_fw_1.output_size + cell_bw_1.output_size,
                            use_peepholes=True
                    )
                    initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32)

                encoder_outputs, _ = brnn(
                        cell_fw=cell_fw_1,
                        cell_bw=cell_bw_1,
                        inputs=[encoder_states_2d[:, utterance, :] for utterance in range(history_length)],
                        initial_state_fw=initial_state_fw_1,
                        initial_state_bw=initial_state_bw_1,
                        name='RNNHistoryBidirectionalLayer',
                        reuse=None
                )

                _, encoder_states = rnn(
                        cell=cell_fw_2,
                        inputs=encoder_outputs,
                        initial_state=initial_state_fw_2,
                        name='RNNHistoryForwardEncoder',
                        reuse=None
                )

            with tf.name_scope("Decoder"):
                with tf.name_scope("RNNDecoderCell"):
                    cell = LSTMCell(
                            num_units=decoder_lstm_size,
                            input_size=decoder_embedding_size+cell_fw_2.state_size,
                            use_peepholes=True,
                    )
                    initial_state = cell.zero_state(batch_size, tf.float32)

                # decode all histories along the utterance axis
                final_encoder_state = encoder_states[-1]

                decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                        cell=cell,
                        inputs=[actions[:, word] for word in range(decoder_sequence_length)],
                        static_input=final_encoder_state,
                        initial_state=initial_state, #final_encoder_state,
                        embedding_size=decoder_embedding_size,
                        embedding_length=decoder_vocabulary_length,
                        sequence_length=decoder_sequence_length,
                        name='RNNDecoder',
                        reuse=False,
                        use_inputs_prob=self.use_inputs_prob
                )

                self.predictions = tf.concat(1, decoder_outputs_softmax)
                # print(p_o_i)

        if FLAGS.print_variables:
            for v in tf.trainable_variables():
                print(v.name)

        with tf.name_scope('loss'):
            one_hot_labels = dense_to_one_hot(actions, decoder_vocabulary_length)
            self.loss = tf.reduce_mean(- one_hot_labels * tf.log(tf.clip_by_value(self.predictions, 1e-10, 1.0)), name='loss')
            tf.scalar_summary('loss', self.loss)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(self.predictions, 2))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
            tf.scalar_summary('accuracy', self.accuracy)