Ejemplo n.º 1
0
def build_cell(num_units,
               num_layers,
               is_train,
               cell_type,
               dropout=0.0,
               forget_bias=0.0,
               use_residual=False,
               dim_project=None):
    with tf.name_scope(cell_type):
        list_cell = [
            single_cell(num_units=num_units,
                        is_train=is_train,
                        cell_type=cell_type,
                        dropout=dropout,
                        forget_bias=forget_bias,
                        dim_project=dim_project) for _ in range(num_layers)
        ]
    # Residual
    if use_residual:
        for c in range(1, len(list_cell)):
            list_cell[c] = ResidualWrapper(list_cell[c])

    return MultiRNNCell(list_cell) if num_layers > 1 else list_cell[0]
Ejemplo n.º 2
0
    def __init__(self,
                 rnn_layers,
                 seq_length,
                 dynamic=False,
                 bidirectional=False):
        """
        Parameters
        ----------
            rnn_layers : list
                List of RNN layers to stack.
            seq_length : int
                Max length of the input sequences.
            dynamic : boolean
                Influences whether the layer will be working as dynamic RNN or static. The difference
                between static and dynamic is that in case of static TensorFlow builds static graph and the RNN
                will always go through each time step in the sequence. In case of dynamic TensorFlow will be
                creating RNN `in a while loop`, that is to say that using dynamic RNN you can pass sequences of 
                variable length, but you have to provide list of sequences' lengthes. Currently API for using
                dynamic RNNs is not provided.
            bidirectional : boolean
                Influences whether the layer will be bidirectional.
        """
        self.rnn_layers = rnn_layers
        self.rnn_cells = []
        for layer in rnn_layers:
            self.rnn_cells.append(layer.cells)
        self.seq_length = seq_length
        self.dynamic = dynamic
        self.bidirectional = bidirectional
        self.stacked_cells = MultiRNNCell(cells=self.rnn_cells)
        self.cell_type = CellType.get_cell_type(bidirectional, dynamic)

        self.params = []
        self.named_params_dict = {}
        for layer in rnn_layers:
            self.params += layer.get_params()
            self.named_params_dict.update(layer.get_params_dict())
Ejemplo n.º 3
0
 def decoder(self, decoder_inputs, enc_output, enc_states,
             target_sequence_length):
     """Memory is a tuple containing the forward and backward final states (output_states_fw,output_states_bw)"""
     with tf.variable_scope("decoder"):
         basic_cell = []
         for i in xrange(len(self.hidden_layer_size)):
             if self.hidden_layer_type[i] == "tanh":
                 basic_cell.append(
                     tf.contrib.rnn.BasicRNNCell(
                         num_units=self.encoder_layer_size[i]))
             if self.hidden_layer_type[i] == "lstm":
                 basic_cell.append(
                     tf.contrib.rnn.BasicLSTMCell(
                         num_units=self.encoder_layer_size[i]))
             if self.hidden_layer_type[i] == "gru":
                 basic_cell.append(
                     GRUCell(num_units=self.encoder_layer_size[i]))
         multicell = MultiRNNCell(basic_cell)
     if not self.attention:
         dec_output,_=tf.nn.bidirectional_dynamic_rnn(cell_fw=multicell,cell_bw=multicell,inputs=decoder_inputs,initial_state_fw=enc_states[0],\
                                                      sequence_length=target_sequence_length,initial_state_bw=enc_states[1])
     else:
         attention_size = decoder_inputs.get_shape().as_list()[-1]
         attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
             attention_size,
             enc_output,
             target_sequence_length,
             normalize=True,
             probability_fn=tf.nn.softmax)
         cell_with_attention = tf.contrib.seq2seq.AttentionWrapper(
             multicell, attention_mechanism, attention_size)
         dec_output, _ = tf.nn.bidirectional_dynamic_rnn(
             cell_fw=cell_with_attention,
             cell_bw=cell_with_attention,
             inputs=decoder_inputs,
             dtype=tf.float32)
     return dec_output
Ejemplo n.º 4
0
class CRPolicy(tf.keras.Model):

    def __init__(self, n_actions):
        super(CRPolicy, self).__init__()
        cells = [GRUCell(128, kernel_initializer=orthogonal(np.sqrt(2))) for _ in range(2)]
        self.gru = MultiRNNCell(cells)
        self.s0 = self.gru.zero_state(batch_size=1, dtype=tf.float32)
        self.cv1 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
        self.mp1 = MaxPool2D()
        self.cv2 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
        self.mp2 = MaxPool2D()
        self.cv3 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
        self.mp3 = MaxPool2D()
        self.flatten = Flatten()
        self.fc1 = Dense(128, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
        self.fc2 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
        self.fc3 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
        self.pol = Dense(n_actions, kernel_initializer=orthogonal(0.01))
        self.val = Dense(1, kernel_initializer=orthogonal(1))

    def call(self, obs, state):
        x = tf.constant(obs, dtype=tf.float32)
        x = self.cv1(x)
        x = self.mp1(x)
        x = self.cv2(x)
        x = self.mp2(x)
        x = self.cv3(x)
        x = self.mp3(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = tf.expand_dims(x, axis=0)
        x, state = dynamic_rnn(self.gru, x, initial_state=state)
        x = tf.reshape(x, shape=[-1, 128])
        pi = self.fc2(x)
        v = self.fc3(x)
        return self.pol(pi), self.val(v), state
Ejemplo n.º 5
0
    def __init__(self, state_size, num_layers, dropout_prob, base_cell):
        """Define the cell by composing/wrapping with tf.contrib.rnn functions.
        
        Args:
            state_size: number of units in the cell.
            num_layers: how many cells to include in the MultiRNNCell.
            dropout_prob: probability of a node being dropped.
            base_cell: (str) name of underling cell to use (e.g. 'GRUCell')
        """

        self._state_size = state_size
        self._num_layers = num_layers
        self._dropout_prob = dropout_prob
        self._base_cell = base_cell

        def single_cell():
            """Convert cell name (str) to class, and create it."""
            return getattr(tf.contrib.rnn, base_cell)(num_units=state_size)

        if num_layers == 1:
            self._cell = single_cell()
        else:
            self._cell = MultiRNNCell(
                [single_cell() for _ in range(num_layers)])
Ejemplo n.º 6
0
def build_rnn_layers(cell_type,
                     num_units_per_layer,
                     use_dropout,
                     dropout_probability,
                     mode,
                     dtype,
                     residual_connections=False,
                     highway_connections=False,
                     as_list=False):

    cell_list = []
    for layer, units in enumerate(num_units_per_layer):

        cell = _build_single_cell(
            cell_type=cell_type,
            num_units=units,
            use_dropout=use_dropout,
            dropout_probability=dropout_probability,
            mode=mode,
            dtype=dtype,
        )

        if highway_connections is True and layer > 0:
            cell = HighwayWrapper(cell)
        elif residual_connections is True and layer > 0:
            cell = ResidualWrapper(cell)

        cell_list.append(cell)

    if len(cell_list) == 1:
        return cell_list[0]
    else:
        if as_list is False:
            return MultiRNNCell(cell_list)
        else:
            return cell_list
Ejemplo n.º 7
0
    def _net(self):
        # RNN and dense layers
        rnn_layer = MultiRNNCell(
            [GRUCell(self.hidden_size) for _ in range(self.n_layer)])
        output_rnn, rnn_state = tf.nn.dynamic_rnn(rnn_layer,
                                                  self.x_mixed,
                                                  dtype=tf.float32)
        input_size = shape(self.x_mixed)[2]
        y_hat_src1 = tf.layers.dense(inputs=output_rnn,
                                     units=input_size,
                                     activation=tf.nn.relu,
                                     name='y_hat_src1')
        y_hat_src2 = tf.layers.dense(inputs=output_rnn,
                                     units=input_size,
                                     activation=tf.nn.relu,
                                     name='y_hat_src2')

        # time-freq masking layer
        y_tilde_src1 = y_hat_src1 / (y_hat_src1 + y_hat_src2 +
                                     np.finfo(float).eps) * self.x_mixed
        y_tilde_src2 = y_hat_src2 / (y_hat_src1 + y_hat_src2 +
                                     np.finfo(float).eps) * self.x_mixed

        return y_tilde_src1, y_tilde_src2
def training_decode(enc_outputs, seq_len, helper, out_dim):
    dec_prenet_outputs = DecoderPrenetWrapper(GRUCell(hp.embed_size),
                                              is_training=True,
                                              prenet_sizes=hp.embed_size,
                                              dropout_prob=hp.dropout)
    attention_mechanism = BahdanauAttention(hp.embed_size,
                                            enc_outputs,
                                            normalize=True,
                                            memory_sequence_length=seq_len,
                                            probability_fn=tf.nn.softmax)
    attn_cell = AttentionWrapper(dec_prenet_outputs,
                                 attention_mechanism,
                                 alignment_history=True,
                                 output_attention=False)
    concat_cell = ConcatOutputAndAttentionWrapper(attn_cell)
    decoder_cell = MultiRNNCell([
        OutputProjectionWrapper(concat_cell, hp.embed_size),
        ResidualWrapper(GRUCell(hp.embed_size)),
        ResidualWrapper(GRUCell(hp.embed_size))
    ],
                                state_is_tuple=True)

    output_cell = OutputProjectionWrapper(decoder_cell, out_dim)
    initial_state = output_cell.zero_state(batch_size=tf.shape(enc_outputs)[0],
                                           dtype=tf.float32)

    decoder = BasicDecoder(cell=output_cell,
                           helper=helper,
                           initial_state=initial_state)

    (outputs, _), last_state, _ = tf.contrib.seq2seq.dynamic_decode(
        decoder=decoder, maximum_iterations=hp.max_len)
    # for attention plot
    alignments = tf.transpose(last_state[0].alignment_history.stack(),
                              [1, 2, 0])
    return outputs, alignments
Ejemplo n.º 9
0
def decoder_rnn(decoder_embedded_inputs, decoder_embedding_matrix,
                encoder_state, num_words, sequence_length, rnn_size,
                num_of_layers, sos_id, eos_id, keep_prob, batch_size):
    # perform on decoding scope
    with tf.variable_scope("decoding") as decoding_scope:
        _lstm = BasicLSTMCell(rnn_size)
        lstm = DropoutWrapper(_lstm, input_keep_prob=keep_prob)

        cell = MultiRNNCell([lstm] * num_of_layers)

        # Initialize weights and biases
        weights = tf.truncated_normal_initializer(stddev=0.1)
        biases = tf.zeros_initializer()

        # define output function
        output_function = lambda x: tf.contrib.layers.fully_connected(
            x,
            num_words,
            None,
            scope=decoding_scope,
            weights_initializer=weights,
            biases_initializer=biases)

        training_prediction = decode_training_set(
            encoder_state, cell, decoder_embedded_inputs, sequence_length,
            decoding_scope, output_function, keep_prob, batch_size)

        decoding_scope.reuse_variables()
        test_predictions = decode_test_set(encoder_state, cell,
                                           decoder_embedding_matrix, sos_id,
                                           eos_id, sequence_length - 1,
                                           num_words, decoding_scope,
                                           output_function, keep_prob,
                                           batch_size)

    return training_prediction, test_predictions
Ejemplo n.º 10
0
    def _dynamic_birnn(self, x, seq_len, batch_size, max_seq_len):

        cell_fw = MultiRNNCell([GRUCell(cell_hidden) for cell_hidden in self.cell_hidden])
        cell_bw = MultiRNNCell([GRUCell(cell_hidden) for cell_hidden in self.cell_hidden])
        init_state_fw = cell_fw.zero_state(batch_size, dtype=tf.float32)
        init_state_bw = cell_bw.zero_state(batch_size, dtype=tf.float32)

        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=x,
            initial_state_fw=init_state_fw,
            initial_state_bw=init_state_bw,
            sequence_length=seq_len
        )

        # outputs = tf.concat(outputs, 2)
        #
        # if not self.avg_output:
        #     index = tf.range(0, batch_size) * max_seq_len + (seq_len - 1)
        #     outputs = tf.reshape(outputs, [-1, self.cell_hidden[-1] * 2])
        #     outputs = tf.gather(outputs, index)
        # else:
        #     outputs = tf.reduce_sum(outputs, axis=1)
        #     outputs = tf.divide(outputs, tf.cast(seq_len[:, None], tf.float32))

        outputs = (outputs[0] + outputs[1]) / 2

        if not self.avg_output:
            index = tf.range(0, batch_size) * max_seq_len + (seq_len - 1)
            outputs = tf.reshape(outputs, [-1, self.cell_hidden[-1]])
            outputs = tf.gather(outputs, index)
        else:
            outputs = tf.reduce_sum(outputs, axis=1)
            outputs = tf.divide(outputs, tf.cast(seq_len[:, None], tf.float32))

        fc = tf.layers.dense(outputs, 1000)
        fc = tf.nn.leaky_relu(fc, 0.2)
        fc = tf.layers.dense(fc, self.n_class)

        return fc
Ejemplo n.º 11
0
    def build_decoder(self,
                      encoder_output,
                      encoder_state,
                      triple_input,
                      decoder_input,
                      train_mode=True):
        if self.cell_class == 'GRU':
            decoder_cell = MultiRNNCell(
                [GRUCell(self.num_units) for _ in range(self.num_layers)])
        elif self.cell_class == 'LSTM':
            decoder_cell = MultiRNNCell(
                [LSTMCell(self.num_units) for _ in range(self.num_layers)])
        else:
            decoder_cell = MultiRNNCell(
                [RNNCell(self.num_units) for _ in range(self.num_layers)])

        if train_mode:
            with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE) as scope:
                if self.use_trans_select:
                    kd_context = self.transfer_matching(
                        encoder_output, triple_input)
                else:
                    kd_context = None
                # prepare attention
                attention_keys, attention_values, attention_construct_fn \
                    = prepare_attention(encoder_output, kd_context, 'bahdanau', self.num_units)
                decoder_fn_train = attention_decoder_train(
                    encoder_state=encoder_state,
                    attention_keys=attention_keys,
                    attention_values=attention_values,
                    attention_construct_fn=attention_construct_fn)
                # train decoder
                decoder_output, _, _ = dynamic_rnn_decoder(
                    cell=decoder_cell,
                    decoder_fn=decoder_fn_train,
                    inputs=decoder_input,
                    sequence_length=self.responses_length,
                    scope=scope)
                output_fn = create_output_fn(vocab_size=self.vocab_size)
                output_logits = output_fn(decoder_output)
                return output_logits
        else:
            with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE) as scope:
                if self.use_trans_select:
                    kd_context = self.transfer_matching(
                        encoder_output, triple_input)
                else:
                    kd_context = None
                attention_keys, attention_values, attention_construct_fn \
                    = prepare_attention(encoder_output, kd_context, 'bahdanau', self.num_units, reuse=tf.AUTO_REUSE)
                output_fn = create_output_fn(vocab_size=self.vocab_size)
                # inference decoder
                decoder_fn_inference = attention_decoder_inference(
                    num_units=self.num_units,
                    num_decoder_symbols=self.vocab_size,
                    output_fn=output_fn,
                    encoder_state=encoder_state,
                    attention_keys=attention_keys,
                    attention_values=attention_values,
                    attention_construct_fn=attention_construct_fn,
                    embeddings=self.word_embed,
                    start_of_sequence_id=GO_ID,
                    end_of_sequence_id=EOS_ID,
                    maximum_length=self.max_length)

                # get decoder output
                decoder_distribution, _, _ = dynamic_rnn_decoder(
                    cell=decoder_cell,
                    decoder_fn=decoder_fn_inference,
                    scope=scope)
                return decoder_distribution
Ejemplo n.º 12
0
    def build_encoder(self, post_word_input, corr_responses_input):
        if self.cell_class == 'GRU':
            encoder_cell = MultiRNNCell(
                [GRUCell(self.num_units) for _ in range(self.num_layers)])
        elif self.cell_class == 'LSTM':
            encoder_cell = MultiRNNCell(
                [LSTMCell(self.num_units) for _ in range(self.num_layers)])
        else:
            encoder_cell = MultiRNNCell(
                [RNNCell(self.num_units) for _ in range(self.num_layers)])

        with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE) as scope:
            encoder_output, encoder_state = tf.nn.dynamic_rnn(
                encoder_cell,
                post_word_input,
                self.posts_length,
                dtype=tf.float32,
                scope=scope)
        batch_size, encoder_len = tf.shape(self.posts)[0], tf.shape(
            self.posts)[1]
        corr_response_input = tf.reshape(corr_responses_input,
                                         [batch_size, -1, self.dim_emb])
        corr_cum_len = tf.shape(corr_response_input)[1]
        with tf.variable_scope('mutual_attention', reuse=tf.AUTO_REUSE):
            encoder_out_trans = tf.layers.dense(encoder_output,
                                                self.num_units,
                                                name='encoder_out_transform')
            corr_response_trans = tf.layers.dense(
                corr_response_input,
                self.num_units,
                name='corr_response_transform')
            encoder_out_trans = tf.expand_dims(encoder_out_trans, axis=1)
            encoder_out_trans = tf.tile(encoder_out_trans,
                                        [1, corr_cum_len, 1, 1])
            encoder_out_trans = tf.reshape(encoder_out_trans,
                                           [-1, encoder_len, self.num_units])

            corr_response_trans = tf.reshape(corr_response_trans,
                                             [-1, self.num_units])
            corr_response_trans = tf.expand_dims(corr_response_trans, axis=1)

            # TODO: try bilinear attention
            v = tf.get_variable("attention_v", [self.num_units],
                                dtype=tf.float32)
            score = tf.reduce_sum(
                v * tf.tanh(encoder_out_trans + corr_response_trans), axis=2)
            alignments = tf.nn.softmax(score)

            encoder_out_tiled = tf.expand_dims(encoder_output, axis=1)
            encoder_out_tiled = tf.tile(encoder_out_tiled,
                                        [1, corr_cum_len, 1, 1])
            encoder_out_tiled = tf.reshape(encoder_out_tiled,
                                           [-1, encoder_len, self.num_units])

            context_mutual = tf.reduce_sum(tf.expand_dims(alignments, 2) *
                                           encoder_out_tiled,
                                           axis=1)
            context_mutual = tf.reshape(context_mutual,
                                        [batch_size, -1, self.num_units])
            context_mutual = tf.reduce_mean(context_mutual, axis=1)

        encoder_output = tf.concat(
            [encoder_output, tf.expand_dims(context_mutual, 1)], axis=1)

        if self.use_trans_repr:
            trans_output = tf.layers.dense(self.trans_reprs,
                                           self.num_units,
                                           name='trans_reprs_transform',
                                           reuse=tf.AUTO_REUSE)
            encoder_output = tf.concat([encoder_output, trans_output], axis=1)

        return encoder_output, encoder_state
Ejemplo n.º 13
0
c.act_factor = tf.placeholder(tf.float32, shape=(), name="act_factor")
c.adapt = tf.placeholder(tf.float32, shape=(), name="adapt")
c.tau_m = tf.placeholder(tf.float32, shape=(), name="tau_m")

input = tf.placeholder(tf.float32,
                       shape=(seq_size, batch_size, input_size),
                       name="Input")
sequence_length = tf.placeholder(shape=(batch_size, ), dtype=tf.int32)

Finput = tf.placeholder(tf.float32,
                        shape=(filter_len * input_size, layer_size),
                        name="Finput")

net = MultiRNNCell([
    LCACell(input_size, layer_size, filter_len, c, tf.nn.relu, Finput=Finput),
])

state = tuple(
    tuple((
        tf.placeholder(tf.float32, [batch_size, cell.layer_size], name="u"),
        tf.placeholder(tf.float32, [batch_size, cell.layer_size], name="a"),
        tf.placeholder(tf.float32, [batch_size, cell.layer_size], name="a_m"),
        tf.placeholder(
            tf.float32,
            [batch_size, cell.filter_len * cell.input_size, cell.layer_size],
            name="dF"),
    )) for cell in net._cells)

get_zero_state = lambda: tuple(
    np.zeros((batch_size, ) + tuple(t.get_shape().as_list()[1:]))
Ejemplo n.º 14
0
    def create_model(self):
        """
        当前Model
        rnn q & p
        p2q atten1 :<p_emb_bi|q_emb_bi>
        self atten2 : W*d_emb_bi, W*q_emb_bi
        new_d_emb_bi : softmax(atten1 + atten2) * d_emb_bi
        rnn(new_d_emb_bi)
        :return:
        """
        num_layers = self.args.num_layers
        hidden_size = self.args.hidden_size
        char_hidden_size = self.args.char_hidden_size
        char_embedding_dim = self.args.char_embedding_dim
        cell = LSTMCell if self.args.use_lstm else GRUCell

        q_input = tf.placeholder(dtype=tf.int32,
                                 shape=[None, self.q_len],
                                 name='questions_bt')
        d_input = tf.placeholder(dtype=tf.int32,
                                 shape=[None, self.d_len],
                                 name='documents_bt')
        answer_s = tf.placeholder(dtype=tf.float32,
                                  shape=[None, None],
                                  name='answer_start')
        answer_e = tf.placeholder(dtype=tf.float32,
                                  shape=[None, None],
                                  name='answer_end')
        q_input_char = tf.placeholder(
            dtype=tf.int32,
            shape=[None, self.q_len, self.q_char_len],
            name='questions_bt_char')
        d_input_char = tf.placeholder(
            dtype=tf.int32,
            shape=[None, self.d_len, self.d_char_len],
            name='documents_bt_char')

        init_embed = tf.constant(self.embedding_matrix, dtype=tf.float32)
        embedding_matrix = tf.get_variable(name='embdding_matrix',
                                           initializer=init_embed,
                                           dtype=tf.float32)

        q_real_len = tf.reduce_sum(tf.sign(tf.abs(q_input)), axis=1)
        d_real_len = tf.reduce_sum(tf.sign(tf.abs(d_input)), axis=1)
        # d_mask = tf.sequence_mask(dtype = tf.float32, maxlen = self.d_len, lengths = d_real_len)
        # q_mask = tf.sequence_mask(dtype = tf.float32, maxlen = self.q_len, lengths = d_real_len)
        _EPSILON = 10e-8
        self.d_real_len = d_real_len
        batch_size = tf.shape(q_input)[0]

        if self.args.use_char_embedding:
            char_embedding = tf.get_variable(name='can_embdding_matrix',
                                             initializer=tf.constant(
                                                 self.char_embedding_matrix,
                                                 dtype=tf.float32),
                                             dtype=tf.float32,
                                             trainable=True)
            q_char_embed = tf.nn.embedding_lookup(char_embedding, q_input_char)
            d_char_embed = tf.nn.embedding_lookup(char_embedding, d_input_char)
            q_char_embed = tf.nn.dropout(q_char_embed,
                                         keep_prob=self.args.keep_prob)
            d_char_embed = tf.nn.dropout(d_char_embed,
                                         keep_prob=self.args.keep_prob)
            # with tf.variable_scope('char_embedding', reuse = tf.AUTO_REUSE) as scp:
            #
            #     q_char_embed = tf.reshape(q_char_embed, [-1, self.q_len, self.d_char_len * char_embedding_dim])
            #     d_char_embed = tf.reshape(d_char_embed, [-1, self.d_len, self.q_char_len * char_embedding_dim])
            #
            #     char_rnn_f = MultiRNNCell(
            #         cells = [DropoutWrapper(cell(char_hidden_size), output_keep_prob = self.args.keep_prob)])
            #     char_rnn_b = MultiRNNCell(
            #         cells = [DropoutWrapper(cell(char_hidden_size), output_keep_prob = self.args.keep_prob)])
            #
            #     d_char_embed_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = char_rnn_f, cell_bw = char_rnn_b, inputs = d_char_embed,
            #                                                              sequence_length = d_real_len, initial_state_bw = None,
            #                                                              dtype = "float32", parallel_iterations = None,
            #                                                              swap_memory = True, time_major = False, scope = 'char_rnn')
            #     q_char_embed_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = char_rnn_f, cell_bw = char_rnn_b, inputs = q_char_embed,
            #                                                              sequence_length = q_real_len, initial_state_bw = None,
            #                                                              dtype = "float32", parallel_iterations = None,
            #                                                              swap_memory = True, time_major = False, scope = 'char_rnn')

            with tf.variable_scope('char_conv', reuse=tf.AUTO_REUSE) as scp:
                q_char_embed = tf.transpose(
                    q_char_embed, perm=[0, 2, 3,
                                        1])  # [batch, height, width, channels]
                filter = tf.get_variable(
                    'q_filter_w', shape=[5, 5, self.q_len, self.q_len]
                )  # [filter_height, filter_width, in_channels, out_channels]
                cnned_char = tf.nn.conv2d(
                    q_char_embed,
                    filter,
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    use_cudnn_on_gpu=True,
                    data_format="NHWC",
                    name=None
                )  # [B, (char_len-filter_size/stride), (word_len-filter_size/stride), d_len]

                q_char_embed_out = tf.nn.max_pool(cnned_char,
                                                  ksize=[1, 5, 5, 1],
                                                  strides=[1, 1, 1, 1],
                                                  padding='VALID',
                                                  data_format="NHWC",
                                                  name=None)

                char_out_size = q_char_embed_out.get_shape().as_list(
                )[1] * q_char_embed_out.get_shape().as_list()[2]
                q_char_embed_out = tf.reshape(
                    tf.transpose(q_char_embed_out, perm=[0, 3, 1, 2]),
                    shape=[batch_size, self.q_len, char_out_size])

                d_char_embed = tf.transpose(
                    d_char_embed, perm=[0, 2, 3,
                                        1])  # [batch, height, width, channels]
                filter = tf.get_variable(
                    'd_filter_w', shape=[5, 5, self.d_len, self.d_len]
                )  # [filter_height, filter_width, in_channels, out_channels]
                cnned_char = tf.nn.conv2d(
                    d_char_embed,
                    filter,
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    use_cudnn_on_gpu=True,
                    data_format="NHWC",
                    name=None
                )  # [B, (char_len-filter_size/stride), (word_len-filter_size/stride), d_len]

                d_char_embed_out = tf.nn.max_pool(cnned_char,
                                                  ksize=[1, 5, 5, 1],
                                                  strides=[1, 1, 1, 1],
                                                  padding='VALID',
                                                  data_format="NHWC",
                                                  name=None)
                char_out_size = d_char_embed_out.get_shape().as_list(
                )[1] * d_char_embed_out.get_shape().as_list()[2]
                d_char_embed_out = tf.reshape(
                    tf.transpose(d_char_embed_out, perm=[0, 3, 1, 2]),
                    shape=[batch_size, self.d_len, char_out_size])

                d_char_embed_out = tf.reshape(
                    d_char_embed_out,
                    shape=[batch_size, self.d_len, char_out_size])
                d_char_out = tf.concat(d_char_embed_out, -1)
                q_char_out = tf.concat(q_char_embed_out, -1)

        with tf.variable_scope('q_encoder') as scp:
            q_embed = tf.nn.embedding_lookup(embedding_matrix, q_input)
            if self.args.use_char_embedding:
                q_embed = tf.concat([q_embed, q_char_out], -1)

            q_rnn_f = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
                for _ in range(num_layers)
            ])
            q_rnn_b = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
                for _ in range(num_layers)
            ])

            outputs, q_last_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=q_rnn_f,
                cell_bw=q_rnn_b,
                inputs=q_embed,
                sequence_length=q_real_len,
                initial_state_bw=None,
                dtype="float32",
                parallel_iterations=None,
                swap_memory=True,
                time_major=False,
                scope=None)

            # last_states -> (output_state_fw, output_state_bw)
            # q_emb_bi = tf.concat([q_last_states[0][-1], q_last_states[1][-1]], axis = -1)
            q_emb_bi = tf.concat(outputs, axis=-1)
            if self.args.use_lstm:
                q_last_states_con = tf.concat(
                    [q_last_states[0][-1][-1], q_last_states[1][-1][-1]],
                    axis=-1)
            else:
                q_last_states_con = tf.concat(q_last_states, -1)

            logger("q_encoded_bf shape {}".format(q_emb_bi.get_shape()))

        with tf.variable_scope('d_encoder'):
            d_embed = tf.nn.embedding_lookup(embedding_matrix, d_input)
            if self.args.use_char_embedding:
                d_embed = tf.concat([d_embed, d_char_out], -1)

            d_rnn_f = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
                for _ in range(num_layers)
            ])
            d_rnn_b = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
                for _ in range(num_layers)
            ])

            d_rnn_out, last_states = tf.nn.bidirectional_dynamic_rnn(
                cell_bw=d_rnn_b,
                cell_fw=d_rnn_f,
                inputs=d_embed,
                sequence_length=d_real_len,
                swap_memory=True,
                dtype="float32",
            )
            d_emb_bi = tf.concat(d_rnn_out, axis=-1)
            self.d_emb_bi = d_emb_bi
            logger("d_encoded_bf shape {}".format(d_emb_bi.get_shape()))

        with tf.variable_scope('attention_dq'):
            atten_q2d, atten_d2q = context_query_attention(
                context=d_emb_bi,
                query=q_emb_bi,
                scope='context_query_att',
                reuse=None)
            attened_d = tf.concat([
                tf.add(d_emb_bi, atten_d2q),
                tf.add(d_emb_bi, atten_q2d), d_emb_bi
            ],
                                  axis=-1)
            # computing c dot b
            # atten_d_q = tf.einsum('bij,bjk->bik', d_emb_bi, tf.transpose(q_emb_bi, perm = [0, 2, 1]))
            # atten_d = tf.reduce_sum(atten_d_q, axis = -1)
            # attened_d_masked = atten_d / tf.expand_dims(tf.reduce_sum(atten_d, -1), -1) * d_mask
            # there should be [None, seq_len, hidden_size]
            # attened_d = tf.multiply(d_emb_bi, tf.expand_dims(attened_d_masked,
            #                                                  -1))  # self.sess.run([self.atten_d, self.attened_d, self.result_s[-1], self.result_e[-1]], data)
        self.attened_d = attened_d
        q_emb_rl = q_last_states_con
        memory = tf.concat(
            [q_last_states[0][-1][-1], q_last_states[1][-1][-1]], axis=-1)
        memory_cell = cell(hidden_size * 4)
        m_state = memory_cell.zero_state(batch_size=tf.shape(d_emb_bi)[0],
                                         dtype=tf.float32)
        candi_embed = d_embed
        result_ss = tf.zeros(
            shape=[tf.shape(d_emb_bi)[0],
                   tf.shape(d_emb_bi)[1]])
        result_ee = tf.zeros(
            shape=[tf.shape(d_emb_bi)[0],
                   tf.shape(d_emb_bi)[1]])
        activ = 'tanh'

        def inference(hidden_d, memory, m_state, result_s, result_e):
            # position = tf.stack([tf.range(0, tf.shape(d_real_len)[0], dtype = tf.int32),
            #                      tf.mod(i, d_real_len)], axis = 1)  # F**k, x.get_shape()[0] is not equal tf.shape(x)[0], f**k!!!

            # hidden_d = tf.reshape(tf.gather_nd(attened_d, position), shape = [-1, d_emb_bi.get_shape()[-1]])
            x_context, m_state = memory_cell(
                tf.concat([memory, hidden_d], axis=-1),
                state=m_state)  # just use for gru cell, x = m_state
            # tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized.
            # Cause by GPU memory full
            # update memory: use the question and the context to update
            with tf.variable_scope('reinforce', reuse=tf.AUTO_REUSE) as scp:
                context_and_q = tf.concat([x_context, hidden_d, q_emb_rl],
                                          axis=-1)
                rl_w = tf.get_variable(name='w',
                                       shape=[
                                           context_and_q.get_shape()[-1],
                                           context_and_q.get_shape()[-1]
                                       ])
                if activ == 'tanh':
                    rl_mul_context_q = tf.tanh(tf.matmul(context_and_q, rl_w))
                else:
                    rl_mul_context_q = tf.nn.relu(
                        tf.matmul(context_and_q, rl_w))
                out = tf.nn.tanh(
                    rl_mul_context_q
                )  # b * 1, Note: should use the bias here, while select_prob == 0 !!!!!
                memory_update_w = tf.get_variable(
                    "memory_update_w",
                    shape=[
                        context_and_q.get_shape()[-1],
                        memory.get_shape()[-1]
                    ])
                memory = tf.multiply(
                    tf.nn.tanh(tf.matmul(out, memory_update_w)), memory)
            # inference : use the new memory to inference the answer
            with tf.variable_scope('inference', reuse=tf.AUTO_REUSE) as scp:
                context = tf.nn.dropout(tf.concat([memory, hidden_d, q_emb_rl],
                                                  -1),
                                        keep_prob=self.args.keep_prob)
                infer_bilinear_start = tf.get_variable(
                    'infer_bilinear_start',
                    shape=[
                        context.get_shape()[-1],
                        candi_embed.get_shape()[-1]
                    ])
                pre_anw = tf.squeeze(tf.einsum(
                    'bij,bjk->bik', candi_embed,
                    tf.expand_dims(tf.matmul(context, infer_bilinear_start),
                                   -1)),
                                     axis=-1)
                # pre_anw = tf.reduce_sum(
                #     tf.multiply(tf.transpose(candi_embed, [1, 0, 2]), tf.nn.relu(tf.matmul(context, infer_bilinear_start))), axis = -1)
                pre_anw_pro_s = pre_anw
                infer_bilinear_end = tf.get_variable(
                    'infer_bilinear_end',
                    shape=[
                        context.get_shape()[-1],
                        candi_embed.get_shape()[-1]
                    ])
                pre_anw = tf.squeeze(tf.einsum(
                    'bij,bjk->bik', candi_embed,
                    tf.expand_dims(tf.matmul(context, infer_bilinear_end),
                                   -1)),
                                     axis=-1)
                # pre_anw = tf.reduce_sum(
                #     tf.multiply(tf.transpose(candi_embed, [1, 0, 2]), tf.nn.relu(tf.matmul(context, infer_bilinear_end))), axis = -1)
                pre_anw_pro_e = pre_anw
                # pre_anw_pro = self.softmax_with_mask(tf.transpose(pre_anw), mask = d_mask, axis = -1)

            # with tf.variable_scope('interence_end', reuse = tf.AUTO_REUSE):
            logger("pre_anw_pro_s shape {}".format(pre_anw_pro_s.get_shape()))
            return [memory, m_state, pre_anw_pro_s, pre_anw_pro_e]

        _, self.m_state, result_s, result_e = tf.scan(
            fn=lambda pre, x: inference(tf.squeeze(x, 0), *pre),
            elems=[tf.transpose(attened_d, perm=[1, 0, 2])],
            initializer=[memory, m_state, result_ss, result_ee],
            name='scan',
            swap_memory=True)
        self.result_s = result_s[-1]
        self.result_e = result_e[-1]
        self.answer_s = answer_s
        self.answer_e = answer_e

        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.result_s, labels=tf.argmax(answer_s, -1))
        losses += tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.result_e, labels=tf.argmax(answer_e, -1))
        self.loss = tf.reduce_mean(losses)
        # 如果使用log,那mask必须为1
        # self.loss = -tf.reduce_mean( tf.reduce_sum(tf.multiply(tf.log(result_prob_s), answer_s) + tf.multiply(tf.log(result_prob_e), answer_e)))
        # self.add_loss(answer_s, answer_e)
        self.correct_prediction = tf.reduce_sum(
            tf.sign(
                tf.cast(tf.logical_and(
                    tf.equal(
                        tf.argmax(self.answer_s, 1, output_type=tf.int32),
                        tf.argmax(self.result_s, -1, output_type=tf.int32)),
                    tf.equal(
                        tf.argmax(self.answer_e, 1, output_type=tf.int32),
                        tf.argmax(self.result_e, -1, output_type=tf.int32))),
                        dtype='float')))

        self.begin_acc = tf.reduce_sum(
            tf.sign(
                tf.cast(tf.equal(
                    tf.argmax(self.answer_s, 1, output_type=tf.int32),
                    tf.argmax(self.result_s, -1, output_type=tf.int32)),
                        dtype='float')))
        self.end_acc = tf.reduce_sum(
            tf.sign(
                tf.cast(tf.equal(
                    tf.argmax(self.answer_e, 1, output_type=tf.int32),
                    tf.argmax(self.result_e, -1, output_type=tf.int32)),
                        dtype='float')))
Ejemplo n.º 15
0
    def __init__(self, is_training, config, input_):
        self._is_training = is_training
        self._input = input_
        self.batch_size = input_.batch_size
        self.num_steps = input_.num_steps

        self._input_data = input_.input_data
        size = config.X_dim
        hidden_size = config.hidden_size
        vocab_size = config.vocab_size

        self._targets = input_.targets
        # Construct prior
        prior = Prior(config.prior_pi, config.log_sigma1, config.log_sigma2)

        # Fetch embeddings
        inputs = input_.input_data
        # Build the BBB LSTM cells
        cells = []
        for i in range(config.num_layers):
            if (i == 0):
                LSTM_input_size = config.X_dim
            else:
                LSTM_input_size = config.hidden_size

            cells.append(
                BayesianLSTMCell(LSTM_input_size,
                                 config.hidden_size,
                                 prior,
                                 is_training,
                                 forget_bias=0.0,
                                 name="bbb_lstm_{}".format(i)))

        cell = MultiRNNCell(cells, state_is_tuple=True)
        self._initial_state = cell.zero_state(config.batch_size, data_type())
        state = self._initial_state

        # Forward pass for the truncated mini-batch
        outputs = []
        with tf.variable_scope("RNN"):
            for time_step in range(self.num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
        output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_size])

        # Softmax weights
        softmax_w = sample_posterior((hidden_size, vocab_size), "softmax_w",
                                     prior, is_training)
        softmax_b = sample_posterior((vocab_size, 1), "softmax_b", prior,
                                     is_training)

        logits = tf.nn.xw_plus_b(output, softmax_w, tf.squeeze(softmax_b))
        logits = tf.reshape(logits,
                            [self.batch_size, self.num_steps, vocab_size])

        self._output = tf.nn.softmax(logits)

        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            input_.targets,
            tf.ones([self.batch_size, self.num_steps], dtype=data_type()),
            average_across_timesteps=False,
            average_across_batch=False)

        # Update the cost
        # Remember to divide by batch size
        self._cost = tf.reduce_sum(loss) / self.batch_size
        self._kl_loss = 0.
        self._final_state = state

        if not is_training:
            return

        #Compute KL divergence
        #B = number of batches aka the epoch size
        #C = number of truncated sequences in a batch aka batch_size variable
        B = self._input.epoch_size
        C = self.batch_size

        kl_loss = tf.add_n(tf.get_collection("KL_layers"), "kl_divergence")

        kl_factor = 1.0 / (B * C)
        self._kl_loss = kl_factor * kl_loss

        self._total_loss = self._cost + self._kl_loss

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self._total_loss, tvars), config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())

        self._new_lr = tf.placeholder(data_type(),
                                      shape=[],
                                      name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
Ejemplo n.º 16
0
def bidirectional_GRU(inputs,
                      inputs_len,
                      cell=None,
                      cell_fn=tf.contrib.rnn.GRUCell,
                      units=Params.attn_size,
                      layers=1,
                      scope="Bidirectional_GRU",
                      output=0,
                      is_training=True,
                      reuse=None):
    '''
    Bidirectional recurrent neural network with GRU cells.

    Args:
        inputs:     rnn input of shape (batch_size, timestep, dim)
        inputs_len: rnn input_len of shape (batch_size, )
        cell:       rnn cell of type RNN_Cell.
        output:     if 0, output returns rnn output for every timestep,
                    if 1, output returns concatenated state of backward and
                    forward rnn.
    '''
    with tf.variable_scope(scope, reuse=reuse):
        if cell is not None:
            (cell_fw, cell_bw) = cell
        else:
            shapes = inputs.get_shape().as_list()
            if len(shapes) > 3:
                inputs = tf.reshape(inputs,
                                    (shapes[0] * shapes[1], shapes[2], -1))
                inputs_len = tf.reshape(inputs_len, (shapes[0] * shapes[1], ))

            # if no cells are provided, use standard GRU cell implementation
            if layers > 1:
                cell_fw = MultiRNNCell([
                    apply_dropout(cell_fn(units),
                                  size=inputs.shape[-1] if i == 0 else units,
                                  is_training=is_training)
                    for i in range(layers)
                ])
                cell_bw = MultiRNNCell([
                    apply_dropout(cell_fn(units),
                                  size=inputs.shape[-1] if i == 0 else units,
                                  is_training=is_training)
                    for i in range(layers)
                ])
            else:
                cell_fw, cell_bw = [
                    apply_dropout(cell_fn(units),
                                  size=inputs.shape[-1],
                                  is_training=is_training) for _ in range(2)
                ]

        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw,
            cell_bw,
            inputs,
            # sequence_length = inputs_len,
            dtype=tf.float32)
        if output == 0:
            return tf.concat(outputs, 2)
        elif output == 1:
            return tf.reshape(tf.concat(states, 1),
                              (Params.batch_size, shapes[1], 2 * units))
Ejemplo n.º 17
0
    def lstm_rnn_subgraph(self):
        """ Defines the forward pass through the decoder LSTM-RNN. """
        with tf.variable_scope('lstm_rnn', reuse=None), tf.device('/gpu:0'):
            # Same functionality as within the encoder
            def _lstm_cell(model_opt):
                """ Defines a basic LSTM cell to which various wrappers can be applied. """
                base_cell = BasicLSTMCell(model_opt.dec_hidden_dims,
                                          forget_bias=2.5,
                                          state_is_tuple=True)
                if model_opt.allow_dropout:
                    base_cell = DropoutWrapper(
                        base_cell, output_keep_prob=self.rnn_keep_prob)
                return base_cell

            if self.opt.dec_num_layers > 1:
                cell = MultiRNNCell([
                    _lstm_cell(self.opt)
                    for _ in range(self.opt.dec_num_layers)
                ])
            else:
                cell = _lstm_cell(self.opt)

            # Obtain sequences decoded from the encoder's sentence representations
            # <PAD> slice output by the decoder after each generated batch sequence has ended in <EOS>
            pad_step_idx = tf.fill([self.batch_length], self.vocab.pad_id)
            pad_step_embeds = tf.nn.embedding_lookup(self.embedding_table,
                                                     pad_step_idx,
                                                     name='pad_step_embeds')

            # raw_rnn expects input to be served in form of a TensorArray
            inputs_ta = tf.TensorArray(size=self.batch_steps, dtype=self.float_type) \
                .unstack(tf.transpose(self.input_data, perm=[1, 0, 2]), name='rnn_input_array')

            # Initial decoder state set equal to the final state of the encoder
            initial_state = self.encoder.decoder_state

            # Initialize tensor for tracking sentence completion
            if self.eos_tracker is None:
                self.eos_tracker = tf.zeros([self.batch_length],
                                            dtype=self.int_type)

            # Define the raw_rnn loop which allows for greater control over the generated output, as compared
            # to dynamic_rnn()
            def loop_fn(time, cell_output, cell_state, loop_state):
                """ Defines the loop function utilized by the raw_rnn. """

                # Helper function for obtaining the output embeddings
                def _get_predictions():
                    """ Projects the likeliest raw_rnn output predictions into the embedding space. """
                    # Flatten RNN output to two dimensions
                    flat_step_outputs = tf.reshape(
                        cell_output, [-1, self.opt.dec_hidden_dims])
                    projected_step_outputs = tf.nn.xw_plus_b(
                        flat_step_outputs, self.projection_weights,
                        self.projection_biases)
                    step_logits = tf.nn.xw_plus_b(projected_step_outputs,
                                                  tf.transpose(
                                                      self.embedding_table),
                                                  self.output_embedding_biases,
                                                  name='logits')
                    # Isolate highest-probability predictions
                    predicted_scores = tf.nn.softmax(step_logits, -1)
                    idx_predictions = tf.cast(tf.argmax(predicted_scores,
                                                        axis=-1),
                                              dtype=self.int_type)
                    # Embed predicted word indices
                    embedded_predictions = tf.nn.embedding_lookup(
                        self.embedding_table, idx_predictions)
                    return idx_predictions, embedded_predictions

                def _attend():
                    """ Executes the decoding-with-attention mechanism utilizing global 'Luong' attention. """
                    # Project encoder hidden states, 'memories', to match the dimensionality of the decoder,
                    # i.e. target, hidden states
                    memory_values = self.encoder.rnn_outputs
                    flat_values = tf.reshape(
                        memory_values, [-1, tf.shape(memory_values)[-1]])
                    flat_keys = tf.matmul(flat_values, self.memory_key_weights)
                    memory_keys = tf.reshape(flat_keys, [
                        self.encoder.batch_length, self.encoder.batch_steps,
                        self.opt.dec_hidden_dims
                    ])

                    # Apply length to the memory keys so as to restrict attention to non-padded positions
                    score_mask = tf.sequence_mask(
                        self.encoder.length_mask,
                        maxlen=tf.reduce_max(self.encoder.length_mask),
                        dtype=self.float_type)
                    score_mask = tf.expand_dims(score_mask, -1)
                    score_mask = tf.matmul(score_mask,
                                           tf.ones([
                                               self.encoder.batch_length,
                                               self.opt.dec_hidden_dims, 1
                                           ]),
                                           transpose_b=True)
                    memory_keys = tf.where(tf.cast(score_mask, dtype=tf.bool),
                                           memory_keys,
                                           tf.zeros_like(memory_keys))

                    # Obtain target query, i.e. the current decoder hidden state
                    target_hidden_state = cell_state[-1][-1]
                    target_query = tf.expand_dims(target_hidden_state, 1)

                    # Compute alignments globally, by attending to all encoder states at once
                    score = tf.matmul(target_query,
                                      memory_keys,
                                      transpose_b=True)
                    score = tf.squeeze(score, [1])
                    alignments = tf.nn.softmax(score)

                    # Compute the context vector by applying calculated alignments to encoder states
                    expanded_alignments = tf.expand_dims(alignments, 1)
                    context = tf.matmul(expanded_alignments, memory_values)
                    context = tf.squeeze(context, [1])

                    # Compute the attentional vector by combining encoder context with decoder query
                    attention = tf.tanh(
                        tf.matmul(
                            tf.concat([context, target_hidden_state], -1),
                            self.attention_weights))
                    return attention

                # Initialize the loop function
                emit_output = cell_output  # no output is emitted during initialization
                next_loop_state = None
                # Check if to terminate the loop;
                # length slack denotes how much longer the output sequence is allowed to be than the input
                elements_finished = tf.greater_equal(
                    time, self.length_mask + self.opt.length_slack)
                # Once stopping conditions are met for all batch elements, terminate loop
                finished = tf.reduce_all(elements_finished)

                if cell_output is None:  # i.e. during initialization only
                    # Set initial values
                    self.eos_tracker *= 0
                    next_cell_state = initial_state
                    next_input = inputs_ta.read(0)

                # At time-step 1+
                else:
                    # Pass on the cell state
                    next_cell_state = cell_state
                    # Get predictions from previous time-step
                    predicted_idx, predicted_embeds = _get_predictions()
                    # Check if stopping conditions are met
                    # 1. Check if all decoded batch items contain an <EOS> prediction
                    self.eos_tracker += tf.cast(
                        tf.equal(predicted_idx, self.vocab.eos_id),
                        self.int_type)
                    # 2. Check if all decoded batch items are equal in length to corresponding encoder inputs
                    boundary_reached = tf.greater_equal(time, self.length_mask)
                    if not self.opt.is_train or not self.opt.use_reconstruction_objective:
                        # Extended stopping criterion during inference,
                        # as output length is allowed to exceed input length via the slack_length parameter
                        self.eos_tracker += tf.cast(
                            tf.equal(predicted_idx, self.vocab.eos_id),
                            self.int_type)
                        elements_finished = tf.logical_or(
                            tf.greater(self.eos_tracker, 0),
                            tf.greater_equal(
                                time,
                                (self.length_mask + self.opt.length_slack)))
                        finished = tf.reduce_all(elements_finished)

                    # Scheduled sampling: If flip value is smaller than sampling probability, the output of the
                    # decoder at the current time-step is fed as input to the decoder at the subsequent time-step
                    flip = tf.random_uniform(shape=[], minval=0.0, maxval=1.0)
                    input_tensor = tf.cond(
                        tf.logical_or(tf.less(self.sampling_bias, flip),
                                      tf.reduce_all(boundary_reached)),
                        lambda: predicted_embeds, lambda: inputs_ta.read(time))
                    # If stopping conditions have been met, output a <PAD> slice, then terminate loop
                    next_input = tf.cond(finished, lambda: pad_step_embeds,
                                         lambda: input_tensor)

                    if self.opt.attentive_decoding:
                        # Input feeding: Combine attentive information with the input to the decoder at the
                        # subsequent time-step (either target tokens or predictions from the current time-step)
                        attentional_hidden_state = _attend()
                        next_input = tf.matmul(
                            tf.concat([next_input, attentional_hidden_state],
                                      -1), self.dec_mixture_weights)

                if self.opt.attentive_encoding:
                    # Unused
                    next_input = tf.matmul(
                        tf.concat(
                            [next_input, self.encoder.sentence_encodings], -1),
                        self.enc_mixture_weights)

                return elements_finished, next_input, next_cell_state, emit_output, next_loop_state

            # Get RNN outputs
            rnn_outputs_tensor_array, final_state, _ = tf.nn.raw_rnn(
                cell, loop_fn)
            rnn_outputs = rnn_outputs_tensor_array.stack()
            rnn_outputs = tf.transpose(rnn_outputs, perm=[1, 0, 2])
            flat_rnn_outputs = tf.reshape(rnn_outputs,
                                          [-1, self.opt.enc_hidden_dims],
                                          name='reshaped_rnn_outputs')
            # Project RNN outputs into the embedding space, followed by the projection into vocabulary space
            projected_rnn_outputs = tf.nn.xw_plus_b(flat_rnn_outputs,
                                                    self.projection_weights,
                                                    self.projection_biases)
            logits = tf.nn.xw_plus_b(projected_rnn_outputs,
                                     tf.transpose(
                                         self.encoder.embedding_table),
                                     self.output_embedding_biases,
                                     name='logits')

        return final_state, flat_rnn_outputs, projected_rnn_outputs, logits
Ejemplo n.º 18
0
    def create_model(self):
        #########################
        # b ... position of the example within the batch
        # t ... position of the word within the document/question
        # f ... features of the embedding vector or the encoded feature vector
        # i ... position of the word in candidates list
        #########################
        num_layers = self.args.num_layers
        hidden_size = self.args.hidden_size
        cell = LSTMCell if self.args.use_lstm else GRUCell

        # model input
        questions_bt = tf.placeholder(dtype=tf.int32,
                                      shape=(None, self.q_len),
                                      name="questions_bt")
        documents_bt = tf.placeholder(dtype=tf.int32,
                                      shape=(None, self.d_len),
                                      name="documents_bt")
        candidates_bi = tf.placeholder(dtype=tf.int32,
                                       shape=(None, self.dataset.A_len),
                                       name="candidates_bi")
        y_true_bi = tf.placeholder(shape=(None, self.dataset.A_len),
                                   dtype=tf.float32,
                                   name="y_true_bi")

        # shape=(None) the length of inputs
        context_lengths = tf.reduce_sum(tf.sign(tf.abs(documents_bt)), 1)
        question_lengths = tf.reduce_sum(tf.sign(tf.abs(questions_bt)), 1)
        context_mask_bt = tf.sequence_mask(context_lengths,
                                           self.d_len,
                                           dtype=tf.float32)

        init_embedding = tf.constant(self.embedding_matrix,
                                     dtype=tf.float32,
                                     name="embedding_init")
        embedding = tf.get_variable(initializer=init_embedding,
                                    name="embedding_matrix",
                                    dtype=tf.float32)

        with tf.variable_scope('q_encoder',
                               initializer=tf.orthogonal_initializer()):
            # encode question to fixed length of vector
            # output shape: (None, max_q_length, embedding_dim)
            question_embed_btf = tf.nn.embedding_lookup(
                embedding, questions_bt)
            logger("q_embed_btf shape {}".format(
                question_embed_btf.get_shape()))
            q_cell_fw = MultiRNNCell(
                cells=[cell(hidden_size) for _ in range(num_layers)])
            q_cell_bw = MultiRNNCell(
                cells=[cell(hidden_size) for _ in range(num_layers)])
            outputs, last_states = tf.nn.bidirectional_dynamic_rnn(
                cell_bw=q_cell_bw,
                cell_fw=q_cell_fw,
                dtype="float32",
                sequence_length=question_lengths,
                inputs=question_embed_btf,
                swap_memory=True)
            # q_encoder output shape: (None, hidden_size * 2)
            q_encoded_bf = tf.concat([last_states[0][-1], last_states[1][-1]],
                                     axis=-1)
            logger("q_encoded_bf shape {}".format(q_encoded_bf.get_shape()))

        with tf.variable_scope('d_encoder',
                               initializer=tf.orthogonal_initializer()):
            # encode each document(context) word to fixed length vector
            # output shape: (None, max_d_length, embedding_dim)
            d_embed_btf = tf.nn.embedding_lookup(embedding, documents_bt)
            logger("d_embed_btf shape {}".format(d_embed_btf.get_shape()))
            d_cell_fw = MultiRNNCell(
                cells=[cell(hidden_size) for _ in range(num_layers)])
            d_cell_bw = MultiRNNCell(
                cells=[cell(hidden_size) for _ in range(num_layers)])
            outputs, last_states = tf.nn.bidirectional_dynamic_rnn(
                cell_bw=d_cell_bw,
                cell_fw=d_cell_fw,
                dtype="float32",
                sequence_length=context_lengths,
                inputs=d_embed_btf,
                swap_memory=True)
            # d_encoder output shape: (None, max_d_length, hidden_size * 2)
            d_encoded_btf = tf.concat(outputs, axis=-1)
            logger("d_encoded_btf shape {}".format(d_encoded_btf.get_shape()))

        def att_dot(x):  # attention
            """attention dot product function"""
            d_btf, q_bf = x  #  (None, max_d_length, hidden_size * 2) & (None, hidden_size * 2, 1)
            res = tf.matmul(tf.expand_dims(q_bf, -1),
                            d_btf,
                            adjoint_a=True,
                            adjoint_b=True)
            return tf.reshape(res, [-1, self.d_len])

        with tf.variable_scope('merge'):
            mem_attention_pre_soft_bt = att_dot([d_encoded_btf, q_encoded_bf])
            mem_attention_pre_soft_masked_bt = tf.multiply(
                mem_attention_pre_soft_bt,
                context_mask_bt,
                name="attention_mask")
            mem_attention_bt = tf.nn.softmax(
                logits=mem_attention_pre_soft_masked_bt,
                name="softmax_attention")

        # attention-sum process
        def sum_prob_of_word(word_ix, sentence_ixs, sentence_attention_probs):
            word_ixs_in_sentence = tf.where(tf.equal(sentence_ixs, word_ix))
            return tf.reduce_sum(
                tf.gather(sentence_attention_probs, word_ixs_in_sentence))

        # noinspection PyUnusedLocal
        def sum_probs_single_sentence(prev, cur):
            candidate_indices_i, sentence_ixs_t, sentence_attention_probs_t = cur
            result = tf.scan(fn=lambda previous, x: sum_prob_of_word(
                x, sentence_ixs_t, sentence_attention_probs_t),
                             elems=[candidate_indices_i],
                             initializer=tf.constant(0., dtype="float32"))
            return result

        def sum_probs_batch(candidate_indices_bi, sentence_ixs_bt,
                            sentence_attention_probs_bt):
            result = tf.scan(fn=sum_probs_single_sentence,
                             elems=[
                                 candidate_indices_bi, sentence_ixs_bt,
                                 sentence_attention_probs_bt
                             ],
                             initializer=tf.Variable([0] * self.dataset.A_len,
                                                     dtype="float32"))
            return result

        # output shape: (None, i) i = max_candidate_length = 10
        y_hat = sum_probs_batch(candidates_bi, documents_bt, mem_attention_bt)

        # crossentropy
        output = y_hat / tf.reduce_sum(y_hat, axis=-1, keep_dims=True)
        # manual computation of crossentropy
        epsilon = tf.convert_to_tensor(_EPSILON,
                                       output.dtype.base_dtype,
                                       name="epsilon")
        output = tf.clip_by_value(output, epsilon, 1. - epsilon)
        self.loss = tf.reduce_mean(
            -tf.reduce_sum(y_true_bi * tf.log(output), axis=-1))

        # correct prediction nums
        self.correct_prediction = tf.reduce_sum(
            tf.sign(
                tf.cast(tf.equal(tf.argmax(y_hat, 1), tf.argmax(y_true_bi, 1)),
                        "float")))
Ejemplo n.º 19
0
    def __init__(self,
                 inp,
                 inp_mask,
                 decode_time_steps,
                 hyper_params=None,
                 name='Tacotron'):
        """
        Build the computational graph.
        :param inp:
        :param inp_mask:
        :param decode_time_steps:
        :param hyper_params:
        :param name:
        """
        super(Tacotron, self).__init__(name)
        self.hyper_params = HyperParams(
        ) if hyper_params is None else hyper_params

        with tf.variable_scope(name):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            batch_size = tf.shape(inp)[0]
            input_time_steps = tf.shape(inp)[1]
            reduc = self.hyper_params.reduction_rate
            output_time_steps = decode_time_steps * reduc

            ### Encoder [begin]
            with tf.variable_scope('character_embedding'):
                embed_inp = EmbeddingLayer(self.hyper_params.embed_class,
                                           self.hyper_params.embed_dim)(inp)
            with tf.variable_scope('encoder_pre_net'):
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                    embed_inp, 256, tf.nn.relu),
                                               training=False)
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                    pre_ed_inp, 128, tf.nn.relu),
                                               training=False)
            encoder_output = modules.cbhg(pre_ed_inp,
                                          training=False,
                                          k=16,
                                          bank_filters=128,
                                          projection_filters=(128, 128),
                                          highway_layers=4,
                                          highway_units=128,
                                          bi_gru_units=128,
                                          sequence_length=inp_mask,
                                          name='encoder_cbhg',
                                          reuse=False)
            ### Encoder [end]

            ### Attention Module
            with tf.variable_scope('attention'):
                att_module = AttentionModule(256,
                                             encoder_output,
                                             sequence_length=inp_mask,
                                             time_major=False)

            ### Decoder [begin]
            att_cell = ZoneoutWrapper(sGRUCell(256), 0.1, False)
            dec_cell = MultiRNNCell(
                [ResidualWrapper(GRUCell(256)) for _ in range(2)])
            # prepare output alpha TensorArray
            with tf.variable_scope('prepare_decode'):
                # prepare output alpha TensorArray
                reduced_time_steps = tf.div(output_time_steps, reduc)
                init_att_cell_state = att_cell.zero_state(
                    batch_size, tf.float32)
                init_dec_cell_state = dec_cell.zero_state(
                    batch_size, tf.float32)
                init_state_tup = tuple(
                    [init_att_cell_state, init_dec_cell_state])
                init_output_ta = tf.TensorArray(size=reduced_time_steps,
                                                dtype=tf.float32)
                init_alpha_ta = tf.TensorArray(size=reduced_time_steps,
                                               dtype=tf.float32)
                go_array = tf.zeros(
                    [batch_size, self.hyper_params.seq2seq_dim],
                    dtype=tf.float32)
                init_context = tf.zeros([batch_size, 256], dtype=tf.float32)
                init_time = tf.constant(0, dtype=tf.int32)
            cond = lambda x, *_: tf.less(x, reduced_time_steps)

            def body(this_time, old_output_ta, old_alpha_ta, old_state_tup,
                     last_context, last_output):
                with tf.variable_scope('decoder_pre_net'):
                    dec_pre_ed_inp = last_output
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                        dec_pre_ed_inp, 256, tf.nn.relu),
                                                       training=True)
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                        dec_pre_ed_inp, 128, tf.nn.relu),
                                                       training=True)
                with tf.variable_scope('attention_rnn'):
                    att_cell_inp = tf.concat([last_context, dec_pre_ed_inp],
                                             axis=-1)
                    att_cell_out, att_cell_state = att_cell(
                        att_cell_inp, old_state_tup[0])
                with tf.variable_scope('attention'):
                    query = att_cell_state
                    context, alpha = att_module(query)
                    new_alpha_ta = old_alpha_ta.write(this_time, alpha)
                with tf.variable_scope('decoder_rnn'):
                    dec_input = tf.layers.dense(
                        tf.concat([att_cell_out, context], axis=-1), 256)
                    dec_cell_out, dec_cell_state = dec_cell(
                        dec_input, old_state_tup[1])
                    dense_out = tf.layers.dense(
                        dec_cell_out, self.hyper_params.seq2seq_dim * reduc)
                    new_output_ta = old_output_ta.write(this_time, dense_out)
                    new_output = dense_out[:, -self.hyper_params.seq2seq_dim:]
                new_state_tup = tuple([att_cell_state, dec_cell_state])
                return tf.add(
                    this_time, 1
                ), new_output_ta, new_alpha_ta, new_state_tup, context, new_output

            # run loop
            _, seq2seq_output_ta, alpha_ta, *_ = tf.while_loop(
                cond, body, [
                    init_time, init_output_ta, init_alpha_ta, init_state_tup,
                    init_context, go_array
                ])
            with tf.variable_scope('reshape_decode'):
                seq2seq_output = tf.reshape(
                    seq2seq_output_ta.stack(),
                    shape=(reduced_time_steps, batch_size,
                           self.hyper_params.seq2seq_dim * reduc))
                seq2seq_output = tf.reshape(
                    tf.transpose(seq2seq_output, perm=(1, 0, 2)),
                    shape=(batch_size, output_time_steps,
                           self.hyper_params.seq2seq_dim))
                self.seq2seq_output = seq2seq_output

                alpha_output = tf.reshape(alpha_ta.stack(),
                                          shape=(reduced_time_steps,
                                                 batch_size, input_time_steps))
                alpha_output = tf.expand_dims(
                    tf.transpose(alpha_output, perm=(1, 0, 2)), -1)
                self.alpha_output = alpha_output
            ### Decoder [end]

            ### PostNet [begin]
            post_output = modules.cbhg(
                seq2seq_output,
                training=False,
                k=8,
                bank_filters=128,
                projection_filters=(256, self.hyper_params.seq2seq_dim),
                highway_layers=4,
                highway_units=128,
                bi_gru_units=128,
                sequence_length=None,
                name='decoder_cbhg',
                reuse=False)
            post_output = tf.layers.dense(post_output,
                                          self.hyper_params.post_dim,
                                          name='post_linear_transform')
            self.post_output = post_output
Ejemplo n.º 20
0
    def build_decoder_cell(self, encoder_outputs, encoder_state):
        """
        构建解码器cell
        :param encoder_outputs:
        :param encoder_state:
        :return:
        """
        encoder_input_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirection:
            encoder_state = encoder_state[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        if self.use_beamsearch_decode:
            # 复制多份
            encoder_outputs = seq2seq.tile_batch(
                encoder_outputs, multiplier=self.beam_width
            )
            encoder_state = seq2seq.tile_batch(
                encoder_state, multiplier=self.beam_width
            )
            encoder_input_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width
            )
            batch_size *= self.beam_width

        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_size,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )
        else:
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_size,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )

        cell = MultiRNNCell([
            self.build_single_cell(
                self.hidden_size,
                use_residual=self.use_residual)
            for _ in range(self.depth)
        ])

        alignment_history = (
            self.mode != 'train' and not self.use_beamsearch_decode
        )

        def cell_input_fn(inputs, attention):
            if not self.use_residual:
                return array_ops.concat([inputs, attention], -1)

            attn_projection = layers.Dense(self.hidden_size,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')
            return attn_projection(array_ops.concat([inputs, attention], -1))

        cell = AttentionWrapper(
                                cell=cell,
                                attention_mechanism=self.attention_mechanism,
                                attention_layer_size=self.hidden_size,
                                alignment_history=alignment_history,
                                cell_input_fn=cell_input_fn,
                                name='Attention_Wrapper'
        )

        decoder_initial_state = cell.zero_state(
            batch_size, tf.float32)

        # 传递encoder状态
        decoder_initial_state = decoder_initial_state.clone(
            cell_state=encoder_state
        )

        return cell, decoder_initial_state
Ejemplo n.º 21
0
	def _build_network(self, dropout):
		# Legend for tensor shapes below:
		# 	B := batch size
		# 	C := number of classes
		# 	H := number of hidden units (aka layer size)
		# 	S := sequence length

		# keep a reference to _config to make code below simpler
		config = self._config

		# Create size BxS input and target placeholder tensors
		# These will be filled in with actual values at session runtime
		data_dims = [self._batch_size, self._seq_len]
		self._input_ids = tf.placeholder(tf.int32, data_dims)
		self._target_ids = tf.placeholder(tf.int64, data_dims)

		# Create an embedding tensor to represent integer inputs into H dimensions
		# This must be done on the CPU, according to:
		# https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py#L143
		# (Ops and variables pinned to the CPU because of missing GPU implementation)
		with tf.device("/cpu:0"):
			# embeddings is a CxH tensor
			embeddings = tf.get_variable('embeddings', [config.num_classes, config.num_hidden])
			# embedded is a BxSxH tensor
			embedded = tf.nn.embedding_lookup(embeddings, self._input_ids)
			# sequences is a list of length S containing Bx1xH tensors
			sequences = tf.split(embedded, self._seq_len, 1)
			# perform a "squeeze" on each item in the sequence list 
			# inputs is a list of length S containing BxH tensors
			inputs = [tf.squeeze(seq, [1]) for seq in sequences]
		
		# create LSTM cell and stack
		cell = BasicLSTMCell(config.num_hidden)
		if dropout > 0:
			keep_prob = 1 - dropout
			cell = DropoutWrapper(cell, output_keep_prob=keep_prob)
		self._stack = MultiRNNCell([cell]*config.num_layers)
		self._state = self._stack.zero_state(self._batch_size, tf.float32)

		# Pump the inputs through the RNN layers
		# outputs is a list of length S containing BxH tensors
		outputs, self._state = static_rnn(self._stack, inputs, initial_state=self._state)
		# assert len(outputs) == self._seq_len
		#assert outputs[0].get_shape() == (self._batch_size, config.num_hidden), outputs[0].get_shape()

		# Softmax weight tensor is HxC
		W_soft = tf.get_variable('W_soft', [config.num_hidden, config.num_classes])
		# Softmax bias tensor is Cx1
		b_soft = tf.get_variable('b_soft', [config.num_classes])

		# Reshape the output so that we can use it with the softmax weights and bias:
		# 	- concat makes list into a BxSH tensor,
		# 	- reshape converts the BxSH tensor into a BSxH tensor
		output = tf.reshape(tf.concat(outputs, 1), [-1, config.num_hidden])
		#assert output.get_shape() == (self._batch_size*self._seq_len, config.num_hidden), output.get_shape()

		# logits is a (BSxH).(HxC) + 1xC = BSxC + 1xC = BSxC tensor
		logits = tf.nn.xw_plus_b(output, W_soft, b_soft)
		#assert logits.get_shape() == (self._batch_size*self._seq_len, config.num_classes), logits.get_shape()

		# probs is a BSxC tensor, with entry (i,j) containing the probability that batch i is class j
		self._probs = tf.nn.softmax(logits)
		#assert self._probs.get_shape() == (self._batch_size*self._seq_len, config.num_classes), self._probs.get_shape()

		# targets is a BSx1 tensor
		targets = tf.reshape(self._target_ids, [self._batch_size*self._seq_len])
		# cross_entropy is a BSx1 tensor
		cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets)
		#assert cross_entropy.get_shape() == (self._batch_size*self._seq_len)
		
		# loss is a scalar containing the mean of cross_entropy losses
		self._loss = tf.reduce_mean(cross_entropy)
Ejemplo n.º 22
0
    def __init__(self, is_training, config, input_):
        """
        This initializer function will read the hyperparameters, from that it will 
        set the atchitecture of the network.
        
        The is_training flag is nice to build the network. If it is not for training then we do not
        need to builf to the graph the loss function and optimizer.
        
        
        """
        # Variable to know if the model is being used for training 
        self._is_training = is_training
        # TODO: This is the structure we just saw...
        self._input = input_
        
        # Setting the chains properties
        self.batch_size = config.batch_size
        self.num_steps = input_.num_steps
        self._input_data = input_.input_data
        
        input_data_ids = input_.input_data
        
        self._targets = input_.targets
        # Setting the architectute properties
        # Dimensionality of the input !! 
        # TODO: For now we set it the same as the hidden_size. Probably for matrix concatenation purposes ?
       
        # Dimensionality of the output ! In the case of classification, the cardinality of the output
        Y_cardinality = config.Y_cardinality # Size of the output
        
        # Construct prior
        prior = VI.Prior(config.prior_pi, config.log_sigma1, config.log_sigma2)
        
        ########################################################################
        #############  Transform Categorial values (words) into real values vectors ############
        ########################################################################
        # Fetch embeddings
#        with tf.device("/cpu:0"):
#            embedding = VI.sample_posterior([vocab_size, size], "embedding", prior, is_training)
#            inputs = tf.nn.embedding_lookup(embedding, input_.input_data)

        # If we have discrete input X and we want to embed them in random vectors of size "size"
        # We also need to include the cardinality of the output Y.
#        if (type(config.X_dim) != type(None)):
        if (config.embedding == True):
            with tf.device("/cpu:0"):
                embedding = tf.get_variable(
                        "embedding", [Y_cardinality, config.X_dim], dtype=VI.data_type())
                inputs = tf.nn.embedding_lookup(embedding, input_data_ids)
            
            X_dim = config.X_dim
        else:
            X_dim = config.X_dim# inputs.get_shape()[-1].value
            
#            inputs = tf.get_variable("Continous_data_input", [self.batch_size,self.num_steps, X_dim], dtype=VI.data_type(), trainable = False) 
#            inputs.assign(input_data_ids)
#            
#            caca = tf.zeros_initializer(tf.int32)((self.batch_size,Y_cardinality, tf.int32))
#            targets = tf.get_variable("Discrete_Target", [self.batch_size,Y_cardinality], dtype=tf.int32, trainable = False, 
#                                      initializer = caca) 
#            targets.assign(input_.targets)
            
        
#            inputs = tf.Variable(input_data_ids, trainable = False)
#            targets = tf.Variable(input_.targets, trainable = False)
            
            inputs = input_data_ids
            targets = input_.targets
            
        # These are the chains in the Batch. They are represented by a 3D tensor with dimensions
        #     - size_epoch: Number of chains in the batch
        #     - num_steps: Number of elements of the chain
        #     - D:   Dimensionality of the elements of the chain. 
        # TODO: maybe due to the initial embedding that has to be done, all inputs are given when defining the model,
        #       we do not want that, we want them to be in a way where do the preprocessing before and we have chains as placeholder.

        input_chains = inputs[:, :, :]
        
        print ("-----------------------------")
        print ("Input Batch X shape", inputs.shape)
        print ("Input Batch Y shape", targets.shape)
        print ("Input_size: %i"%X_dim)
        print ("Output_size: %i"%Y_cardinality)
        print ("Number of chains in a batch: %i"%self.batch_size)
        print ("Number of elements in a chain: %i"%self.num_steps)
        print ("Number of hidden state neurons LTSM: %i"%config.hidden_size)
        
        ########################################################################
        ############# Start Building the Architecute of the Network ############
        ########################################################################
        
        ######################################################################
        ################  Build and Stack BBB LSTM cells ################
        cells = []
        for i in range(config.num_layers):
            if (i == 0):
                LSTM_input_size = X_dim
            else:
                LSTM_input_size = config.hidden_size
                
            cells.append(BLC.BayesianLSTMCell(LSTM_input_size, config.hidden_size, prior, is_training,
                                      forget_bias=0.0,
                                      name="bbb_lstm_{}".format(i)))
        # The following line will stack the LSTM cells together
        # They just need to follow the interface that we already wrote
        # Notice we use  state_is_tuple=True since the LSTM cells have 2 states C_t and h_t
        DeepLSTMRNN = MultiRNNCell(cells, state_is_tuple=True)
        
        # Initialize the state values to 0 ? 
        # TODO: We need to provide info about the Batch size ? That is the number of chains
        # we want to compute the output at once. 
            
        #####################################################################################
        ################  Propagate the chains in the batch from input to output ################
        
        # Initialization.
        # This is the initial state for the LSTM when we feed it a new chain (is it just the 0s) probably. Then it should output the conditional most lilkely word.
        # We need to give it the batch_size because we are going to propagate the chains in parallel. 
        # initial state will have dimensions [batch_size, (LSTM_hidden_size, LSTM_hidden_size)] since each state of the LSTM is made of the previous 
        self._initial_state = DeepLSTMRNN.zero_state(config.batch_size, VI.data_type())
        state = self._initial_state


        # Forward pass for the truncated mini-batch
        # hs_o: This list will contain in each of its elements, 
        #         the hidden state of the last LSTM of the network
        #         for each of the number of steps (length of the chains that is has to be the same for every chain).
        # Each of this hidden states has dimensions [LSTM_hidden_size, num_batch] since we are computing in parallel for all chains in the batch.

        # Now we propagate the chains in parallel and the initial state through the Deep Bayesian LSTM.
        # At each time step we will save the hidden state of the last LSTM to convert it later to the real output and being able
        # to compute the cost function and the output !

        # TODO: This is probably why we want the chains to have the same length. Also maybe to not having to worry later to weight the
        # cost functions by the length of the chains. Anyway... for now we will just accept it.

        hs_o = []                       
        with tf.variable_scope("RNN"):        # We put all the LSTMs under the name RNN.
            for time_step in range(self.num_steps):  # For each element in the chain
                if (time_step > 0):   # Maybe this is so that we do not create the LSTMS a lot of times in the TensorBoard ?
                    tf.get_variable_scope().reuse_variables()
                
                # Now we start feeding the time_step-th element of each of the chains at the same time to the network, obtaining the state for

                (cell_output, state) = DeepLSTMRNN(input_chains[:,time_step,:], state)
                hs_o.append(cell_output)
        print (["size output state LSTM", cell_output.shape])
        
#        print ("Num steps: %i"%self.num_steps)
        
        # Now we concatenate all the hidden spaces of dimension  [num_batch, LSTM_hidden_size] 
        # into in the list with dimension [num_batch x step_size, LSTM_hidden_size]. At the end of the day
        # all of the hidden spaces will be multiplied by the same weights of the dense softmax layer so we concatenate all of the
        # output hidden spaces for later multiplication.
        hs_o = tf.reshape( tf.concat(hs_o, 1), [-1, config.hidden_size])
        
        print (["Size of the Concatenated output state of the last LSTM for all chains in batch and time-steps in a batch", hs_o.shape])
        ######################################################################
        ################  Build the output layer ############################

        # In our case the output later is just a dense layer that transforms the hidden space
        # of the last LSTM into the prediction of each discrete output (word), applying a softmax 
        # function to the output of the neurons.
        # The parameters of this layer are just the Weights and biases of it.
        
        # The next call function will create the weights if they have not been create before.
        # Identified by the names ""
        # TODO: Not really a TODO, but the important part here is that we changed size vy config.hidden_size
        softmax_w = VI.sample_posterior((config.hidden_size  , Y_cardinality), "softmax_w", prior, is_training)
        softmax_b = VI.sample_posterior((Y_cardinality, 1), "softmax_b", prior, is_training)
        
        print ("Shape of the weights of the output Dense layer",softmax_w.shape)
        print ("Shape of the weights of the output Dense layer",softmax_b.shape)
        ## We propagate the hidden spaces through the network in order to obtain the outout of the network before
        ## the softmax function, which is called the logits. This logits will have dimensions 
        ## [num_batch x step_size, LSTM_hidden_size] that we need to break down further.

        # Logits are the input to the softmax layer !
        logits = tf.nn.xw_plus_b(hs_o, softmax_w, tf.squeeze(softmax_b))
        # We reshape it back to the proper form [chain, sample, output]
        
        print ("Shape of logits after multiplication of ohs", logits.shape)
        logits = tf.reshape(logits, [self.batch_size, self.num_steps, Y_cardinality])
        print ("Shape of logits after reshpaing", logits.shape)
        
        # We can compute the output of the chains !
        # TODO: maybe do not execute this line in the training model to save computation ? Maybe it wouldnt be executed anyway ?
        self._output =  tf.nn.softmax(logits)

        """ This is finally the output of the batch, our prediction of the word,
            for each of the words in the batch. Since we have:
                - self.batch_size number of chains in the batch
                - Each chain has the same number of words: self.num_steps
                - The prediction of each word is the probability of each of the vocab_size variables
        """
        
        #####################################################################################
        ################  Setting the Loss function  ################
        #####################################################################################

        #B = number of batches aka the epoch size
        #C = number of truncated sequences in a batch aka batch_size variable
        B = self._input.epoch_size
        C = self.batch_size
        
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            targets,
            tf.ones([self.batch_size, self.num_steps], dtype=VI.data_type()),
            average_across_timesteps=False,
            average_across_batch=False)

        # Update the cost
        # Remember to divide by batch size
        self._cost = tf.reduce_sum(loss) / self.batch_size
        self._kl_loss = 0.
        self._final_state = state
        
        if not is_training:
            return

        #Compute KL divergence

        ## We get the KL loss that was computed during the sampling of the variational posterior !!
        
        kl_loss = tf.add_n(tf.get_collection("KL_layers"), "kl_divergence")
        
        self._kl_loss =  kl_loss /(B*C)
        
        # Compute the final loss, this is a proportion between the likelihood of the data (_cost)
        # And the KL divergence of the posterior 
        
        # TODO: Remove increased by 2 the cost so that the total cost is more influenced
        # on  the data !
        self._total_loss = self._cost + self._kl_loss
        
        #####################################################################################
        ################  Setting the training algorithm  ################
        #####################################################################################
        
        ## Set the trainable variables, the variables for which the gradient with respect to the loss function
        # will be computed and will be modified by the optimizer when the session is run :) 
            
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self._total_loss, tvars),
                                          config.max_grad_norm)
        
        
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())

        self._new_lr = tf.placeholder(VI.data_type(), shape=[], name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
Ejemplo n.º 23
0
    def create_model(self):
        num_layers = self.args.num_layers
        hidden_size = self.args.hidden_size
        char_hidden_size = self.args.char_hidden_size
        char_embedding_dim = self.args.char_embedding_dim
        cell = LSTMCell if self.args.use_lstm else GRUCell

        q_input = tf.placeholder(dtype=tf.int32,
                                 shape=[None, self.q_len],
                                 name='questions_bt')
        d_input = tf.placeholder(dtype=tf.int32,
                                 shape=[None, self.d_len],
                                 name='documents_bt')
        answer_s = tf.placeholder(dtype=tf.float32,
                                  shape=[None, None],
                                  name='answer_start')
        answer_e = tf.placeholder(dtype=tf.float32,
                                  shape=[None, None],
                                  name='answer_end')
        q_input_char = tf.placeholder(
            dtype=tf.int32,
            shape=[None, self.q_len, self.q_char_len],
            name='questions_bt_char')
        d_input_char = tf.placeholder(
            dtype=tf.int32,
            shape=[None, self.d_len, self.d_char_len],
            name='documents_bt_char')

        init_embed = tf.constant(self.embedding_matrix, dtype=tf.float32)
        embedding_matrix = tf.get_variable(name='embdding_matrix',
                                           initializer=init_embed,
                                           dtype=tf.float32)
        # can_embedding_matrix = tf.get_variable(name = 'can_embdding_matrix', initializer = init_embed, dtype = tf.float32,
        #                                        trainable = False)

        q_real_len = tf.reduce_sum(tf.sign(tf.abs(q_input)), axis=1)
        d_real_len = tf.reduce_sum(tf.sign(tf.abs(d_input)), axis=1)
        d_mask = tf.sequence_mask(dtype=tf.float32,
                                  maxlen=self.d_len,
                                  lengths=d_real_len)
        q_mask = tf.sequence_mask(dtype=tf.float32,
                                  maxlen=self.q_len,
                                  lengths=d_real_len)
        _EPSILON = 10e-8

        batch_size = tf.shape(q_input)[0]

        if self.args.use_char_embedding:
            char_embedding = tf.get_variable(name='can_embdding_matrix',
                                             initializer=tf.constant(
                                                 self.char_embedding_matrix,
                                                 dtype=tf.float32),
                                             dtype=tf.float32,
                                             trainable=True)

            with tf.variable_scope('char_embedding',
                                   reuse=tf.AUTO_REUSE) as scp:
                q_char_embed = tf.nn.embedding_lookup(
                    char_embedding, q_input_char)  # B * Q * C * emb
                d_char_embed = tf.nn.embedding_lookup(
                    char_embedding, d_input_char)  # B * D * C * emb

                # q_char_embed = tf.reshape(q_char_embed, [-1, self.q_len, self.d_char_len * char_embedding_dim])  # B * Q * C * emb
                # d_char_embed = tf.reshape(d_char_embed, [-1, self.d_len, self.q_char_len * char_embedding_dim])  # B * D * C * emb
                # char_rnn_f = MultiRNNCell(
                #     cells = [DropoutWrapper(cell(char_hidden_size), output_keep_prob = self.args.keep_prob)])
                # char_rnn_b = MultiRNNCell(
                #     cells = [DropoutWrapper(cell(char_hidden_size), output_keep_prob = self.args.keep_prob)])
                #
                # d_char_embed_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = char_rnn_f, cell_bw = char_rnn_b, inputs = d_char_embed,
                #                                                       sequence_length = d_real_len, initial_state_bw = None,
                #                                                       dtype = "float32", parallel_iterations = None,
                #                                                       swap_memory = True, time_major = False, scope = 'char_rnn')
                # q_char_embed_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = char_rnn_f, cell_bw = char_rnn_b, inputs = q_char_embed,
                #                                                       sequence_length = q_real_len, initial_state_bw = None,
                #                                                       dtype = "float32", parallel_iterations = None,
                #                                                       swap_memory = True, time_major = False, scope = 'char_rnn')

                q_char_embed = tf.nn.dropout(q_char_embed,
                                             keep_prob=self.args.keep_prob)
                d_char_embed = tf.nn.dropout(d_char_embed,
                                             keep_prob=self.args.keep_prob)
                with tf.variable_scope('char_conv',
                                       reuse=tf.AUTO_REUSE) as scp:

                    q_char_embed = tf.transpose(
                        q_char_embed,
                        perm=[0, 2, 3, 1])  # [batch, height, width, channels]
                    filter = tf.get_variable(
                        'q_filter_w', shape=[5, 5, self.q_len, self.q_len]
                    )  # [filter_height, filter_width, in_channels, out_channels]
                    cnned_char = tf.nn.conv2d(
                        q_char_embed,
                        filter,
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        use_cudnn_on_gpu=True,
                        data_format="NHWC",
                        name=None
                    )  # [B, (char_len-filter_size/stride), (word_len-filter_size/stride), d_len]

                    q_char_embed_out = tf.nn.max_pool(cnned_char,
                                                      ksize=[1, 5, 5, 1],
                                                      strides=[1, 1, 1, 1],
                                                      padding='VALID',
                                                      data_format="NHWC",
                                                      name=None)

                    char_out_size = q_char_embed_out.get_shape().as_list(
                    )[1] * q_char_embed_out.get_shape().as_list()[2]
                    q_char_embed_out = tf.reshape(
                        tf.transpose(q_char_embed_out, perm=[0, 3, 1, 2]),
                        shape=[batch_size, self.q_len, char_out_size])

                    d_char_embed = tf.transpose(
                        d_char_embed,
                        perm=[0, 2, 3, 1])  # [batch, height, width, channels]
                    filter = tf.get_variable(
                        'd_filter_w', shape=[5, 5, self.d_len, self.d_len]
                    )  # [filter_height, filter_width, in_channels, out_channels]
                    cnned_char = tf.nn.conv2d(
                        d_char_embed,
                        filter,
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        use_cudnn_on_gpu=True,
                        data_format="NHWC",
                        name=None
                    )  # [B, (char_len-filter_size/stride), (word_len-filter_size/stride), d_len]

                    d_char_embed_out = tf.nn.max_pool(cnned_char,
                                                      ksize=[1, 5, 5, 1],
                                                      strides=[1, 1, 1, 1],
                                                      padding='VALID',
                                                      data_format="NHWC",
                                                      name=None)
                    char_out_size = d_char_embed_out.get_shape().as_list(
                    )[1] * d_char_embed_out.get_shape().as_list()[2]
                    d_char_embed_out = tf.reshape(
                        tf.transpose(d_char_embed_out, perm=[0, 3, 1, 2]),
                        shape=[batch_size, self.d_len, char_out_size])

                    d_char_embed_out = tf.reshape(
                        d_char_embed_out,
                        shape=[batch_size, self.d_len, char_out_size])

                d_char_out = tf.concat(d_char_embed_out, -1)
                q_char_out = tf.concat(q_char_embed_out, -1)

        with tf.variable_scope('q_encoder') as scp:
            q_embed = tf.nn.embedding_lookup(embedding_matrix, q_input)

            if self.args.use_char_embedding:
                q_embed = tf.concat([q_embed, q_char_out], -1)
            q_rnn_f = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
                for _ in range(num_layers)
            ])
            q_rnn_b = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
                for _ in range(num_layers)
            ])

            outputs, q_last_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=q_rnn_f,
                cell_bw=q_rnn_b,
                inputs=q_embed,
                sequence_length=q_real_len,
                initial_state_bw=None,
                dtype="float32",
                parallel_iterations=None,
                swap_memory=True,
                time_major=False,
                scope=None)

            # last_states -> (output_state_fw, output_state_bw)
            # q_emb_bi = tf.concat([q_last_states[0][-1], q_last_states[1][-1]], axis = -1)
            q_emb_bi = tf.concat(outputs, axis=-1)

            logger("q_encoded_bf shape {}".format(q_emb_bi.get_shape()))

        with tf.variable_scope('d_encoder'):
            d_embed = tf.nn.embedding_lookup(embedding_matrix, d_input)

            if self.args.use_char_embedding:
                d_embed = tf.concat([d_embed, d_char_out], -1)

            d_rnn_f = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
                for _ in range(num_layers)
            ])
            d_rnn_b = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
                for _ in range(num_layers)
            ])

            d_rnn_out, last_states = tf.nn.bidirectional_dynamic_rnn(
                cell_bw=d_rnn_b,
                cell_fw=d_rnn_f,
                inputs=d_embed,
                sequence_length=d_real_len,
                swap_memory=True,
                dtype="float32",
            )
            d_emb_bi = tf.concat(d_rnn_out, axis=-1)
            logger("d_encoded_bf shape {}".format(d_emb_bi.get_shape()))

        # def attention1(x, y, w):
        #     return tf.squeeze(tf.scan(fn = lambda pre, xx: tf.squeeze(tf.concat([xx, y, tf.multiply(xx, y)], axis = -1)) @ w, elems = [x],
        #                               initializer = tf.zeros(shape = [tf.shape(y)[0], 1])), axis = -1)

        with tf.variable_scope('ctq_att'):
            ctq_w = tf.get_variable(shape=[hidden_size * 6, 1], name='ctq_w')
            # dq_dot = tf.scan(fn = lambda pre, x: attention1(tf.transpose(d_emb_bi, perm = [1, 0, 2]), x, ctq_w),
            #                  elems = [tf.transpose(q_emb_bi, perm = [1, 0, 2])],
            #                  initializer = tf.zeros(shape = [self.d_len, tf.shape(q_emb_bi)[1]]))  # should be Q * D * B
            # dq_dot = tf.transpose(dq_dot, perm = [0, 2, 1]) # Q * B * D
            d_expanded = tf.tile(tf.expand_dims(d_emb_bi, 2),
                                 [1, 1, self.q_len, 1])
            q_expanded = tf.tile(tf.expand_dims(q_emb_bi, 1),
                                 [1, self.d_len, 1, 1])
            dq_dot = tf.concat(
                [d_expanded, q_expanded, d_expanded * q_expanded], axis=-1)
            dq_dot = tf.squeeze(tf.tensordot(dq_dot,
                                             ctq_w,
                                             axes=((-1, ), (0, ))),
                                axis=-1)
            dq_dot_softmax = self.softmax_with_mask(
                logits=dq_dot,
                axis=2,
                mask=tf.tile(tf.expand_dims(q_mask, axis=1),
                             [1, self.d_len, 1]))  # Q * B
            U_hat = tf.einsum("bij,bjk->bik", dq_dot_softmax,
                              q_emb_bi)  # B * D * hidden*2
            # U_hat = tf.transpose(U_hat, [1, 0, 2])
            max_atten = self.softmax_with_mask(tf.reduce_max(dq_dot, axis=-1),
                                               mask=d_mask,
                                               axis=-1)  # B * D
            H_hat = tf.tile(
                tf.expand_dims(tf.reduce_sum(
                    tf.multiply(tf.expand_dims(max_atten, axis=-1), d_emb_bi),
                    1),
                               axis=1),
                [1, self.d_len, 1])  # B * D * hidden*2,

            G_belta = tf.concat(
                [d_emb_bi, U_hat, d_emb_bi * U_hat, d_emb_bi * H_hat], axis=-1)

        with tf.variable_scope('model_layer') as scp:
            model_cell_f = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
            ])
            model_cell_b = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
            ])

            outputs, last_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=model_cell_f,
                cell_bw=model_cell_b,
                inputs=G_belta,
                sequence_length=d_real_len,
                swap_memory=True,
                dtype='float32')
            M = tf.concat(outputs, axis=-1)

        with tf.variable_scope('output_layer') as scp:
            w_p_1 = tf.get_variable('w_p_1', shape=[hidden_size * 10, 1])
            out_cell_f = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
            ])
            out_cell_b = MultiRNNCell(cells=[
                DropoutWrapper(cell(hidden_size),
                               output_keep_prob=self.args.keep_prob)
            ])

            outputs, last_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=out_cell_f,
                cell_bw=out_cell_b,
                inputs=M,
                sequence_length=d_real_len,
                dtype='float32')
            M_2 = tf.concat(outputs, axis=-1)
            w_p_2 = tf.get_variable('w_p_2', shape=[hidden_size * 10, 1])

            p1 = self.softmax_with_mask(logits=tf.reshape(
                tf.matmul(
                    tf.reshape(tf.concat([G_belta, M], -1),
                               [-1, hidden_size * 10]), w_p_1),
                [-1, self.d_len]),
                                        axis=-1,
                                        mask=d_mask)
            self.result_s = p1
            p2 = self.softmax_with_mask(logits=tf.reshape(
                tf.matmul(
                    tf.reshape(tf.concat([G_belta, M_2], -1),
                               [-1, hidden_size * 10]), w_p_2),
                [-1, self.d_len]),
                                        axis=-1,
                                        mask=d_mask)
            self.result_e = p2
        self.answer_s = answer_s
        self.answer_e = answer_e
        epsilon = tf.convert_to_tensor(_EPSILON,
                                       p1.dtype.base_dtype,
                                       name="epsilon")
        p1 = tf.clip_by_value(p1, epsilon, 1. - epsilon)
        p2 = tf.clip_by_value(p2, epsilon, 1. - epsilon)
        self.p1 = p1
        self.p2 = p2
        # self.loss = -tf.reduce_mean(tf.reduce_sum(tf.multiply(tf.log(p1), answer_s) + tf.multiply(tf.log(p2), answer_e)))
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.p1, labels=tf.argmax(self.answer_s, -1))
        losses += tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.p2, labels=tf.argmax(self.answer_e, -1))
        self.loss = tf.reduce_mean(losses)

        self.correct_prediction = tf.reduce_sum(
            tf.sign(
                tf.cast(tf.logical_and(
                    tf.equal(
                        tf.argmax(self.answer_s, 1, output_type=tf.int32),
                        tf.argmax(self.result_s, -1, output_type=tf.int32)),
                    tf.equal(
                        tf.argmax(self.answer_e, 1, output_type=tf.int32),
                        tf.argmax(self.result_e, -1, output_type=tf.int32))),
                        dtype='float')))

        self.begin_acc = tf.reduce_sum(
            tf.sign(
                tf.cast(tf.equal(
                    tf.argmax(self.answer_s, 1, output_type=tf.int32),
                    tf.argmax(self.result_s, -1, output_type=tf.int32)),
                        dtype='float')))
        self.end_acc = tf.reduce_sum(
            tf.sign(
                tf.cast(tf.equal(
                    tf.argmax(self.answer_e, 1, output_type=tf.int32),
                    tf.argmax(self.result_e, -1, output_type=tf.int32)),
                        dtype='float')))
Ejemplo n.º 24
0
    def initialize(self,
                   inputs,
                   input_lengths,
                   mel_targets=None,
                   linear_targets=None,
                   pml_targets=None,
                   gta=False,
                   locked_alignments=None,
                   logs_enabled=True):
        '''Initializes the model for inference.

        Sets "pml_outputs", and "alignments" fields.

        Args:
          inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of
            steps in the input time series, and values are character IDs
          input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths
            of each sequence in inputs.
          mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number
            of steps in the output time series, M is num_mels, and values are entries in the mel
            spectrogram. Only needed for training.
          linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number
            of steps in the output time series, F is num_freq, and values are entries in the linear
            spectrogram. Only needed for training.
          pml_targets: float32 Tensor with shape [N, T_out, P] where N is batch_size, T_out is number of
            steps in the PML vocoder features trajectories, P is pml_dimension, and values are PML vocoder
            features. Only needed for training.
          gta: boolean flag that is set to True when ground truth alignment is required
          locked_alignments: when explicit attention alignment is required, the locked alignments are passed in this
            parameter and the attention alignments are locked to these values
          logs_enabled: boolean flag that defaults to True, if False no construction logs output
        '''
        with tf.variable_scope('inference') as scope:
            is_training = pml_targets is not None
            batch_size = tf.shape(inputs)[0]
            hp = self._hparams

            # Embeddings
            embedding_table = tf.get_variable(
                'embedding', [len(symbols), hp.embed_depth],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.5))
            embedded_inputs = tf.nn.embedding_lookup(
                embedding_table, inputs)  # [N, T_in, embed_depth=256]

            # Encoder
            prenet_outputs = prenet(
                embedded_inputs, is_training,
                hp.prenet_depths)  # [N, T_in, prenet_depths[-1]=128]
            encoder_outputs = encoder_cbhg(
                prenet_outputs,
                input_lengths,
                is_training,  # [N, T_in, encoder_depth=256]
                hp.encoder_depth)

            # Attention
            attention_cell = AttentionWrapper(
                GRUCell(hp.attention_depth),
                BahdanauAttention(hp.attention_depth, encoder_outputs),
                alignment_history=True,
                output_attention=False)  # [N, T_in, attention_depth=256]

            # Apply prenet before concatenation in AttentionWrapper.
            attention_cell = DecoderPrenetWrapper(attention_cell, is_training,
                                                  hp.prenet_depths)

            # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector.
            concat_cell = ConcatOutputAndAttentionWrapper(
                attention_cell)  # [N, T_in, 2*attention_depth=512]

            # Decoder (layers specified bottom to top):
            decoder_cell = MultiRNNCell(
                [
                    OutputProjectionWrapper(concat_cell, hp.decoder_depth),
                    ResidualWrapper(GRUCell(hp.decoder_depth)),
                    ResidualWrapper(GRUCell(hp.decoder_depth))
                ],
                state_is_tuple=True)  # [N, T_in, decoder_depth=256]

            # Project onto r PML feature vectors (predict r outputs at each RNN step):
            output_cell = OutputProjectionWrapper(
                decoder_cell, hp.pml_dimension * hp.outputs_per_step)
            decoder_init_state = output_cell.zero_state(batch_size=batch_size,
                                                        dtype=tf.float32)

            if is_training or gta:
                helper = TacoTrainingHelper(inputs, pml_targets,
                                            hp.pml_dimension,
                                            hp.outputs_per_step)
            else:
                helper = TacoTestHelper(batch_size, hp.pml_dimension,
                                        hp.outputs_per_step)

            (multi_decoder_outputs,
             _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
                 BasicDecoder(output_cell, helper, decoder_init_state),
                 maximum_iterations=hp.max_iters)  # [N, T_out/r, P*r]

            # Reshape outputs to be one output per entry
            decoder_outputs = tf.reshape(
                multi_decoder_outputs,
                [batch_size, -1, hp.pml_dimension])  # [N, T_out, P]

            # Postnet: predicts a residual
            postnet_outputs = postnet(decoder_outputs,
                                      layers=hp.postnet_conv_layers,
                                      conv_width=hp.postnet_conv_width,
                                      channels=hp.postnet_conv_channels,
                                      is_training=is_training)

            pml_outputs = decoder_outputs + postnet_outputs

            # Grab alignments from the final decoder state:
            alignments = tf.transpose(
                final_decoder_state[0].alignment_history.stack(), [1, 2, 0])

            self.inputs = inputs
            self.input_lengths = input_lengths
            self.pml_outputs = pml_outputs
            self.alignments = alignments
            self.pml_targets = pml_targets
            log('Initialized Tacotron model. Dimensions: ')
            log('  embedding:               %d' % embedded_inputs.shape[-1])
            log('  prenet out:              %d' % prenet_outputs.shape[-1])
            log('  encoder out:             %d' % encoder_outputs.shape[-1])
            log('  attention out:           %d' % attention_cell.output_size)
            log('  concat attn & out:       %d' % concat_cell.output_size)
            log('  decoder cell out:        %d' % decoder_cell.output_size)
            log('  decoder out (%d frames):  %d' %
                (hp.outputs_per_step, multi_decoder_outputs.shape[-1]))
            log('  decoder out (1 frame):   %d' % pml_outputs.shape[-1])
Ejemplo n.º 25
0
    def __init__(self, is_training, config, input_):
        self._is_training = is_training
        self._input = input_
        self.batch_size = input_.batch_size
        self.num_steps = input_.num_steps
        
        self._input_data = input_.input_data
        size = config.X_dim
        hidden_size = config.hidden_size
        vocab_size = config.vocab_size
        
        self._targets = input_.targets
        # Construct prior
        prior = Prior(config.prior_pi, config.log_sigma1, config.log_sigma2)
        
        # Fetch embeddings
        inputs = input_.input_data
        # Build the BBB LSTM cells
        cells = []
        for i in range(config.num_layers):
            if (i == 0):
                LSTM_input_size = config.X_dim
            else:
                LSTM_input_size = config.hidden_size
                
            cells.append(BayesianLSTMCell(LSTM_input_size, config.hidden_size, prior, is_training,
                                      forget_bias=0.0,
                                      name="bbb_lstm_{}".format(i)))

        cell = MultiRNNCell(cells, state_is_tuple=True)
        self._initial_state = cell.zero_state(config.batch_size, data_type())
        state = self._initial_state
        
        # Forward pass for the truncated mini-batch
        outputs = []
        with tf.variable_scope("RNN"):
            for time_step in range(self.num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
        output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_size])

        # Softmax weights
        softmax_w = sample_posterior((hidden_size, vocab_size), "softmax_w", prior, is_training)
        softmax_b = sample_posterior((vocab_size, 1), "softmax_b", prior, is_training)
        
        logits = tf.nn.xw_plus_b(output, softmax_w, tf.squeeze(softmax_b))
        logits = tf.reshape(logits, [self.batch_size, self.num_steps, vocab_size])
        
        self._output =  tf.nn.softmax(logits)
        
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            input_.targets,
            tf.ones([self.batch_size, self.num_steps], dtype=data_type()),
            average_across_timesteps=False,
            average_across_batch=False)

        # Update the cost
        # Remember to divide by batch size
        self._cost = tf.reduce_sum(loss) / self.batch_size
        self._kl_loss = 0.
        self._final_state = state
        
        if not is_training:
            return

        #Compute KL divergence
        #B = number of batches aka the epoch size
        #C = number of truncated sequences in a batch aka batch_size variable
        B = self._input.epoch_size
        C = self.batch_size
        
        kl_loss = tf.add_n(tf.get_collection("KL_layers"), "kl_divergence")
        
        kl_factor = 1.0/(B*C)
        self._kl_loss = kl_factor * kl_loss
        
        self._total_loss = self._cost + self._kl_loss

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self._total_loss, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())

        self._new_lr = tf.placeholder(data_type(), shape=[], name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
    def __init__(self, inp, inp_mask, decode_time_steps, ctr_flag, ctr_attention, hyper_params=None, name='Tacotron'):
        """
        Build the computational graph.
        :param inp:
        :param inp_mask:
        :param decode_time_steps:
        :param hyper_params:
        :param name:
        """
        super(Tacotron, self).__init__(name)
        self.hyper_params = HyperParams() if hyper_params is None else hyper_params

        with tf.variable_scope(name):
            self.global_step = tf.Variable(0, name='global_step', trainable=False)

            batch_size = tf.shape(inp)[0]
            input_time_steps = tf.shape(inp)[1]
            reduc = self.hyper_params.reduction_rate
            output_time_steps = decode_time_steps * reduc

            ### Encoder [begin]
            with tf.variable_scope('character_embedding'):
                embed_inp = EmbeddingLayer(self.hyper_params.embed_class, self.hyper_params.embed_dim)(inp)
            with tf.variable_scope("changeToVarible"):
                self.single_style_token = tf.get_variable('style_token', (1, self.hyper_params.styles_kind, self.hyper_params.style_dim), dtype=tf.float32)
                self.style_token = tf.tile(self.single_style_token, (batch_size, 1, 1))
            with tf.variable_scope('encoder_pre_net'):
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(embed_inp, 256, tf.nn.relu), training=False)
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(pre_ed_inp, 128, tf.nn.relu), training=False)
            encoder_output = modules.cbhg(pre_ed_inp, training=False, k=16, bank_filters=128,
                                          projection_filters=(128, 128), highway_layers=4, highway_units=128,
                                          bi_gru_units=128, sequence_length=inp_mask,
                                          name='encoder_cbhg', reuse=False)

            with tf.variable_scope('post_text'):
                all_outputs, _ = tf.nn.dynamic_rnn(cell=GRUCell(256), inputs=encoder_output, sequence_length=inp_mask,
                                               dtype=encoder_output.dtype, parallel_iterations=unkonwn_parallel_iterations)
                all_outputs = tf.transpose(all_outputs, [1, 0, 2])
                static_encoder_output = all_outputs[-1]
            ### Encoder [end]

            ### Attention Module
            with tf.variable_scope('attention'):
                att_module = AttentionModule(256, encoder_output, sequence_length=inp_mask, time_major=False)
            with tf.variable_scope("attention_style"):
                att_module_style = AttentionModule(256, self.style_token, time_major=False)

            ### Decoder [begin]
            att_cell = GRUCell(256)
            dec_cell = MultiRNNCell([ResidualWrapper(GRUCell(256)) for _ in range(2)])
            # prepare output alpha TensorArray
            with tf.variable_scope('prepare_decode'):
                # prepare output alpha TensorArray
                reduced_time_steps = tf.div(output_time_steps, reduc)
                init_att_cell_state = att_cell.zero_state(batch_size, tf.float32)
                init_dec_cell_state = dec_cell.zero_state(batch_size, tf.float32)
                init_state_tup = tuple([init_att_cell_state, init_dec_cell_state])
                init_output_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32)
                init_alpha_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32)
                init_weight_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32)
                init_weight_per_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32)
                init_alpha_style_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32)
                go_array = tf.zeros([batch_size, self.hyper_params.seq2seq_dim], dtype=tf.float32)
                init_context = tf.zeros([batch_size, 256], dtype=tf.float32)
                init_context_style = tf.zeros([batch_size, 256], dtype=tf.float32)
                init_time = tf.constant(0, dtype=tf.int32)
            cond = lambda x, *_: tf.less(x, reduced_time_steps)
            def body(this_time, old_output_ta, old_alpha_ta, old_alpha_style_ta, old_weight_ta, old_weight_per_ta,
                     old_state_tup, last_context, last_context_style, last_output):
                with tf.variable_scope('decoder_pre_net'):
                    dec_pre_ed_inp = last_output
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(dec_pre_ed_inp, 256, tf.nn.relu), training=False)
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(dec_pre_ed_inp, 128, tf.nn.relu), training=False)
                with tf.variable_scope('attention_rnn'):
                    # dec_pre_ed_inp = tf.Print(dec_pre_ed_inp, [dec_pre_ed_inp[0]], message='dec', summarize=10)
                    att_cell_inp = tf.concat([last_context, dec_pre_ed_inp], axis=-1)
                    att_cell_out, att_cell_state = att_cell(att_cell_inp, old_state_tup[0])
                with tf.variable_scope('attention'):
                    query = att_cell_state[0]
                    context, alpha = att_module(query)
                    new_alpha_ta = old_alpha_ta.write(this_time, alpha)
                with tf.variable_scope("attention_style"):
                    query_style = att_cell_state[0]
                    context_style, alpha_style = att_module_style(query_style)
                    alpha_style = tf.cond(tf.equal(ctr_flag, 1), lambda: ctr_attention, lambda: alpha_style)
                    alpha_style = tf.Print(alpha_style, [alpha_style], message='alpha:', summarize=10)
                    context_style = tf.cond(tf.equal(ctr_flag, 1),
                                            lambda: tf.reduce_sum(tf.expand_dims(alpha_style, axis=-1) * self.style_token, axis=1),
                                            lambda: context_style)
                    context_style = tf.Print(context_style, [context_style], message='style:', summarize=10)
                    # alpha_style = ctr_attention
                    # alpha_style = tf.Print(alpha_style, [alpha_style], message='alpha', summarize=20)
                    # context_style = tf.reduce_sum(tf.expand_dims(alpha_style, axis=-1) * self.style_token, axis=1)
                    # context_style = tf.Print(context_style, [context_style], message='ctxt_style', summarize=20)
                    new_alpha_style_ta = old_alpha_style_ta.write(this_time, alpha_style)
                with tf.variable_scope("weighting"):
                    weight_input = tf.concat([static_encoder_output, dec_pre_ed_inp], axis=-1)
                    weighting = tf.layers.dense(weight_input, 2, tf.nn.sigmoid)
                    # weighting = tf.Print(weighting, [weighting[1]], message='weighting')
                    weighting = tf.nn.softmax(weighting)
                    weight_text, weight_style = tf.split(weighting, [1, 1], -1)
                    # weight_text = tf.Print(weight_text, [weight_text], message='weight_text:', summarize=20)
                    weight_style = tf.Print(weight_style, [weight_style], message='weight_style:')
                    new_weight_ta = old_weight_ta.write(this_time, weight_text)
                with tf.variable_scope('decoder_rnn'):
                    weighting_context = weight_text * context + weight_style * context_style
                    weight_per = tf.reduce_mean(tf.abs(weight_style * context_style) / (
                            tf.abs(weight_text * context) + tf.abs(weight_style * context_style)))
                    new_weight_per_ta = old_weight_per_ta.write(this_time, weight_per)
                    dec_input = tf.layers.dense(tf.concat([att_cell_out, weighting_context], axis=-1), 256)
                    # dec_input = tf.layers.dense(tf.concat([att_cell_out, context], axis=-1), 256)
                    dec_cell_out, dec_cell_state = dec_cell(dec_input, old_state_tup[1])
                    dense_out = tf.layers.dense(dec_cell_out, self.hyper_params.seq2seq_dim * reduc)
                    new_output_ta = old_output_ta.write(this_time, dense_out)
                    new_output = dense_out[:, -self.hyper_params.seq2seq_dim:]
                new_state_tup = tuple([att_cell_state, dec_cell_state])
                return tf.add(this_time, 1), new_output_ta, new_alpha_ta, new_alpha_style_ta, new_weight_ta,\
                       new_weight_per_ta, new_state_tup, context, context_style, new_output


            # run loop
            _, seq2seq_output_ta, alpha_ta, alpha_style_ta, weight_ta, weight_per_ta, *_ = tf.while_loop(cond, body, [init_time,
                                                                                                                      init_output_ta,
                                                                                                                      init_alpha_ta,
                                                                                                                      init_alpha_style_ta,
                                                                                                                      init_weight_ta,
                                                                                                                      init_weight_per_ta,
                                                                                                                      init_state_tup,
                                                                                                                      init_context,
                                                                                                                      init_context_style,
                                                                                                                      go_array
                                                                                                                      ])
            with tf.variable_scope('reshape_decode'):
                seq2seq_output = tf.reshape(seq2seq_output_ta.stack(),
                                            shape=(reduced_time_steps, batch_size, self.hyper_params.seq2seq_dim * reduc))
                seq2seq_output = tf.reshape(tf.transpose(seq2seq_output, perm=(1, 0, 2)),
                                            shape=(batch_size, output_time_steps, self.hyper_params.seq2seq_dim))
                self.seq2seq_output = seq2seq_output

                alpha_output = tf.reshape(alpha_ta.stack(),
                                          shape=(reduced_time_steps, batch_size, input_time_steps))
                alpha_output = tf.expand_dims(tf.transpose(alpha_output, perm=(1, 0, 2)), -1)
                self.alpha_output = alpha_output

                alpha_output_style = tf.reshape(alpha_style_ta.stack(),
                                                shape=(reduced_time_steps, batch_size, self.hyper_params.styles_kind))
                alpha_output_style = tf.expand_dims(tf.transpose(alpha_output_style, perm=(1, 0, 2)), -1)  # batch major
                self.alpha_output_style = alpha_output_style

                weight_ta = tf.reshape(weight_ta.stack(), shape=(reduced_time_steps, batch_size, 1))
                weight_ta = tf.transpose(weight_ta, perm=(1, 0, 2))
                self.weight_ta = weight_ta

                weight_per_ta = tf.reshape(weight_per_ta.stack(), shape=(reduced_time_steps, 1))
                self.weight_per_ta = weight_per_ta
            ### Decoder [end]

            ### PostNet [begin]
            post_output = modules.cbhg(seq2seq_output, training=False, k=8, bank_filters=128,
                                       projection_filters=(256, self.hyper_params.seq2seq_dim),
                                       highway_layers=4, highway_units=128,
                                       bi_gru_units=128, sequence_length=None,
                                       name='decoder_cbhg', reuse=False)
            post_output = tf.layers.dense(post_output, self.hyper_params.post_dim, name='post_linear_transform')
            self.post_output = post_output
Ejemplo n.º 27
0
    def add_model(self, inputs, type_layer):
        '''Construction of the RNN model with LSTM cells.
        Arguments:
            - type_layer: should be 'Context' or 'Questions'
        '''

        with tf.variable_scope(
                'Hidden-Layers',
                initializer=tf.contrib.layers.xavier_initializer()) as scope:
            reuse = type_layer == "Questions"
            initializer = tf.random_uniform_initializer(-1, 1)

            if self.config.nb_hidden_layers > 1:
                if self.config.type_cell == "LSTM":
                    cell_fw = MultiRNNCell([
                        LSTMCell(self.config.hidden_size,
                                 initializer=initializer,
                                 reuse=reuse)
                        for _ in range(self.config.nb_hidden_layers)
                    ])
                    if self.config.bidirectional:
                        cell_bw = MultiRNNCell([
                            LSTMCell(self.config.hidden_size,
                                     initializer=initializer,
                                     reuse=reuse)
                            for _ in range(self.config.nb_hidden_layers)
                        ])
                elif self.config.type_cell == "GRU":
                    cell_fw = MultiRNNCell([
                        GRUCell(self.config.hidden_size,
                                kernel_initializer=initializer,
                                reuse=reuse)
                        for _ in range(self.config.nb_hidden_layers)
                    ])
                    if self.config.bidirectional:
                        cell_bw = MultiRNNCell([
                            GRUCell(self.config.hidden_size,
                                    kernel_initializer=initializer,
                                    reuse=reuse)
                            for _ in range(self.config.nb_hidden_layers)
                        ])
                else:
                    raise NotImplementedError
            else:
                if self.config.type_cell == "LSTM":
                    cell_fw = LSTMCell(self.config.hidden_size,
                                       initializer=initializer,
                                       reuse=reuse)
                    if self.config.bidirectional:
                        cell_bw = LSTMCell(self.config.hidden_size,
                                           initializer=initializer,
                                           reuse=reuse)
                elif self.config.type_cell == "GRU":
                    cell_fw = GRUCell(self.config.hidden_size,
                                      kernel_initializer=initializer,
                                      reuse=reuse)
                    if self.config.bidirectional:
                        cell_bw = GRUCell(self.config.hidden_size,
                                          kernel_initializer=initializer,
                                          reuse=reuse)
                else:
                    raise NotImplementedError

            if type_layer == "Context":
                batch_size = self.config.len_context
                sequence_length = self.context_len_placeholder
            elif type_layer == "Questions":
                batch_size = self.config.len_questions
                sequence_length = self.questions_len_placeholder

            cell_fw = DropoutWrapper(cell_fw,
                                     output_keep_prob=self.dropout_placeholder)
            initial_state_fw = cell_fw.zero_state(batch_size, tf.float32)

            if self.config.bidirectional:
                cell_bw = DropoutWrapper(
                    cell_bw, output_keep_prob=self.dropout_placeholder)
                initial_state_bw = cell_bw.zero_state(batch_size, tf.float32)
                outputs, hidden_states = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    inputs,
                    initial_state_fw=initial_state_fw,
                    initial_state_bw=initial_state_bw,
                    sequence_length=sequence_length)
            else:
                outputs, hidden_states = tf.nn.dynamic_rnn(
                    cell_fw,
                    inputs,
                    initial_state=initial_state_fw,
                    sequence_length=sequence_length)

        if self.config.output_type == "output":
            output = tf.transpose(outputs, [1, 0, 2])
            output = tf.gather(output, self.config.len_questions - 1)

        elif self.config.output_type == "hs":
            if self.config.hidden_bidirectional:
                output = (hidden_states[0], hidden_states[1])
                if self.config.nb_hidden_layers > 1:
                    output = (output[0][-1], output[1][-1])
                if self.config.type_cell == "LSTM":
                    output = (output[0].h, output[1].h)
            else:
                output = hidden_states
                if self.config.nb_hidden_layers > 1:
                    output = output[-1]
                if self.config.type_cell == "LSTM":
                    output = output.h

        return output
Ejemplo n.º 28
0
    def __init__(self, data_size, time_len, unit_size, num_layers, batch_size,
                 learning_rate, feed_previous):
        '''
        Create the basic encoder-decoder seq2seq model
        :param unit_size: number of units in each LSTM layer of the model
        :param num_layers: number of LSTM layers in the model
        :param batch_size: the size of batches used during training
        :param learning_rate: 
        '''

        self.input_size = data_size
        self.time_len = time_len
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         name='lr')
        self.global_step = tf.Variable(0, trainable=False, name='global_step')

        def single_cell():
            return BasicLSTMCell(unit_size)

        cell = single_cell()
        if num_layers > 1:
            cell = MultiRNNCell([single_cell() for _ in range(num_layers)])

        print('state size', cell.state_size)
        print('zero state size',
              cell.zero_state(self.batch_size, dtype=tf.float32))

        # Set placeholder for encoder's inputs
        self.encoder_inputs = []
        self.decoder_inputs = []

        for i in range(self.time_len):
            self.encoder_inputs.append(
                tf.placeholder(shape=[self.batch_size, self.input_size],
                               name='encoder{}'.format(i),
                               dtype=tf.float32))

            self.decoder_inputs.append(
                tf.placeholder(shape=[self.batch_size, self.input_size],
                               name='decoder{}'.format(i),
                               dtype=tf.float32))

        # The purpose is reconstruction, thus the targets should be the reverse of the input
        targets = self.encoder_inputs[::-1]
        outputs, _ = advanced_rnn_seq2seq(
            encoder_inputs=self.encoder_inputs,
            decoder_inputs=self.decoder_inputs,
            cell=cell,
            num_decoder_symbols=self.input_size,
            output_projection=None,
            feed_previous=feed_previous
        )  # the outputs have been projected based on the original lstm outputs

        targets = tf.stack(targets, axis=1)
        self.outputs = tf.stack(outputs, axis=1)
        self.loss = tf.losses.mean_squared_error(targets, self.outputs)
        self.error_vector = tf.abs(self.outputs - targets)

        # set up the train operation
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.train_op = optimizer.minimize(self.loss,
                                           global_step=self.global_step)

        # the saver for handling all parameters for the model
        self.saver = tf.train.Saver(tf.global_variables())
Ejemplo n.º 29
0
with tf.name_scope('decode_input'):
    decode_input = [tf.zeros_like(encode_input[0], dtype=int_type, name="GO")] + labels[:-1]
    
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder("float", name='keep_prob')


# In[5]:

cells = [
    DropoutWrapper(
        BasicLSTMCell(num_hidden), output_keep_prob=keep_prob_val
    ) for i in range(num_layers)
]

stacked_lstm = MultiRNNCell(cells)

with tf.variable_scope("decoders") as scope:
    decode_outputs, decode_state = seq2seq.embedding_attention_seq2seq(encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden, dtype=float_type)

    scope.reuse_variables()

    decode_outputs_test, decode_state_test = seq2seq.embedding_attention_seq2seq(encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden, dtype=float_type, feed_previous=True)
    

# In[6]:

with tf.name_scope('loss'):
    loss_weights = [tf.ones_like(l, dtype=float_type) for l in labels]
    loss = seq2seq.sequence_loss(decode_outputs, labels, loss_weights, vocab_size)
Ejemplo n.º 30
0
    def initialize(self,
                   inputs,
                   input_lengths,
                   mel_targets=None,
                   linear_targets=None):
        '''Initializes the model for inference.

    Sets "mel_outputs", "linear_outputs", and "alignments" fields.

    Args:
      inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of
        steps in the input time series, and values are character IDs
      input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths
        of each sequence in inputs.
      mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number
        of steps in the output time series, M is num_mels, and values are entries in the mel
        spectrogram. Only needed for training.
      linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number
        of steps in the output time series, F is num_freq, and values are entries in the linear
        spectrogram. Only needed for training.
    '''
        with tf.variable_scope('inference') as scope:
            is_training = linear_targets is not None
            batch_size = tf.shape(inputs)[0]
            hp = self._hparams

            # Embeddings

            symbols_length = 149  # BASED ON PREVIOUS LENGTH OF LIST

            embedding_table = tf.get_variable(
                'embedding', [symbols_length, hp.embed_depth],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.5))
            embedded_inputs = tf.nn.embedding_lookup(
                embedding_table, inputs)  # [N, T_in, embed_depth=256]

            # Encoder
            prenet_outputs = prenet(
                embedded_inputs, is_training,
                hp.prenet_depths)  # [N, T_in, prenet_depths[-1]=128]
            encoder_outputs = encoder_cbhg(
                prenet_outputs,
                input_lengths,
                is_training,  # [N, T_in, encoder_depth=256]
                hp.encoder_depth)

            # Attention
            attention_cell = AttentionWrapper(
                GRUCell(hp.attention_depth),
                BahdanauAttention(hp.attention_depth, encoder_outputs),
                alignment_history=True,
                output_attention=False)  # [N, T_in, attention_depth=256]

            # Apply prenet before concatenation in AttentionWrapper.
            attention_cell = DecoderPrenetWrapper(attention_cell, is_training,
                                                  hp.prenet_depths)

            # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector.
            concat_cell = ConcatOutputAndAttentionWrapper(
                attention_cell)  # [N, T_in, 2*attention_depth=512]

            # Decoder (layers specified bottom to top):
            decoder_cell = MultiRNNCell(
                [
                    OutputProjectionWrapper(concat_cell, hp.decoder_depth),
                    ResidualWrapper(GRUCell(hp.decoder_depth)),
                    ResidualWrapper(GRUCell(hp.decoder_depth))
                ],
                state_is_tuple=True)  # [N, T_in, decoder_depth=256]

            # Project onto r mel spectrograms (predict r outputs at each RNN step):
            output_cell = OutputProjectionWrapper(
                decoder_cell, hp.num_mels * hp.outputs_per_step)
            decoder_init_state = output_cell.zero_state(batch_size=batch_size,
                                                        dtype=tf.float32)

            helper = TacoTestHelper(batch_size, hp.num_mels,
                                    hp.outputs_per_step)

            (decoder_outputs,
             _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
                 BasicDecoder(output_cell, helper, decoder_init_state),
                 maximum_iterations=hp.max_iters)  # [N, T_out/r, M*r]

            # Reshape outputs to be one output per entry
            mel_outputs = tf.reshape(
                decoder_outputs,
                [batch_size, -1, hp.num_mels])  # [N, T_out, M]

            # Add post-processing CBHG:
            post_outputs = post_cbhg(
                mel_outputs,
                hp.num_mels,
                is_training,  # [N, T_out, postnet_depth=256]
                hp.postnet_depth)
            linear_outputs = tf.layers.dense(post_outputs,
                                             hp.num_freq)  # [N, T_out, F]

            # Grab alignments from the final decoder state:
            alignments = tf.transpose(
                final_decoder_state[0].alignment_history.stack(), [1, 2, 0])

            self.inputs = inputs
            self.input_lengths = input_lengths
            self.mel_outputs = mel_outputs
            self.linear_outputs = linear_outputs
            self.alignments = alignments
            self.mel_targets = mel_targets
            self.linear_targets = linear_targets
Ejemplo n.º 31
0
    def initialize(self,
                   inputs,
                   input_lengths,
                   mel_targets=None,
                   linear_targets=None):
        '''Initializes the model for inference.

    Sets "mel_outputs", "linear_outputs", and "alignments" fields.

    Args:
      inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of
        steps in the input time series, and values are character IDs
      input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths
        of each sequence in inputs.
      mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number
        of steps in the output time series, M is num_mels, and values are entries in the mel
        spectrogram. Only needed for training.
      linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number
        of steps in the output time series, F is num_freq, and values are entries in the linear
        spectrogram. Only needed for training.
    '''
        with tf.variable_scope('inference') as scope:
            is_training = linear_targets is not None
            batch_size = tf.shape(inputs)[0]
            hp = self._hparams

            # Embeddings
            # embedding_table = tf.get_variable(
            #   'embedding', [len(symbols), 256], dtype=tf.float32,
            #   initializer=tf.truncated_normal_initializer(stddev=0.5))
            # embedded_inputs = tf.nn.embedding_lookup(embedding_table, inputs)           # [N, T_in, 256]
            # embedded_inputs = inputs

            # Encoder
            # n_fft = (self._hparams.num_src_freq - 1) * 2
            # in_layer_size = n_fft
            in_layer_size = self._hparams.num_src_freq
            prenet_outputs = prenet(inputs,
                                    is_training,
                                    layer_sizes=[in_layer_size,
                                                 128])  # [N, T_in, 128]
            encoder_outputs = encoder_cbhg(prenet_outputs, input_lengths,
                                           is_training)  # [N, T_in, 256]

            # Attention
            attention_cell = AttentionWrapper(
                DecoderPrenetWrapper(GRUCell(256), is_training),
                BahdanauAttention(256, encoder_outputs),
                alignment_history=True,
                output_attention=False)  # [N, T_in, 256]

            # Concatenate attention context vector and RNN cell output into a 512D vector.
            concat_cell = ConcatOutputAndAttentionWrapper(
                attention_cell)  # [N, T_in, 512]

            # Decoder (layers specified bottom to top):
            decoder_cell = MultiRNNCell([
                OutputProjectionWrapper(concat_cell, 256),
                ResidualWrapper(GRUCell(256)),
                ResidualWrapper(GRUCell(256))
            ],
                                        state_is_tuple=True)  # [N, T_in, 256]

            # Project onto r mel spectrograms (predict r outputs at each RNN step):
            output_cell = OutputProjectionWrapper(
                decoder_cell, hp.num_mels * hp.outputs_per_step)
            decoder_init_state = output_cell.zero_state(batch_size=batch_size,
                                                        dtype=tf.float32)

            if is_training:
                helper = TacoTrainingHelper(inputs, mel_targets, hp.num_mels,
                                            hp.outputs_per_step)
            else:
                helper = TacoTestHelper(batch_size, hp.num_mels,
                                        hp.outputs_per_step)

            (decoder_outputs,
             _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
                 BasicDecoder(output_cell, helper, decoder_init_state),
                 maximum_iterations=hp.max_iters)  # [N, T_out/r, M*r]

            # Reshape outputs to be one output per entry
            mel_outputs = tf.reshape(
                decoder_outputs,
                [batch_size, -1, hp.num_mels])  # [N, T_out, M]

            # Add post-processing CBHG:
            post_outputs = post_cbhg(mel_outputs, hp.num_mels,
                                     is_training)  # [N, T_out, 256]
            linear_outputs = tf.layers.dense(post_outputs,
                                             hp.num_freq)  # [N, T_out, F]

            # Grab alignments from the final decoder state:
            alignments = tf.transpose(
                final_decoder_state[0].alignment_history.stack(), [1, 2, 0])

            self.inputs = inputs
            self.input_lengths = input_lengths
            self.mel_outputs = mel_outputs
            self.linear_outputs = linear_outputs
            self.alignments = alignments
            self.mel_targets = mel_targets
            self.linear_targets = linear_targets
            log('Initialized Tacotron model. Dimensions: ')
            log('  input:                   %d' % inputs.shape[-1])
            log('  prenet out:              %d' % prenet_outputs.shape[-1])
            log('  encoder out:             %d' % encoder_outputs.shape[-1])
            log('  attention out:           %d' % attention_cell.output_size)
            log('  concat attn & out:       %d' % concat_cell.output_size)
            log('  decoder cell out:        %d' % decoder_cell.output_size)
            log('  decoder out (%d frames):  %d' %
                (hp.outputs_per_step, decoder_outputs.shape[-1]))
            log('  decoder out (1 frame):   %d' % mel_outputs.shape[-1])
            log('  postnet out:             %d' % post_outputs.shape[-1])
            log('  linear out:              %d' % linear_outputs.shape[-1])
Ejemplo n.º 32
0
    return DropoutWrapper(make_rnn_cell(), input_keep_prob=keep_prob)


X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])

# 现在在每个时间迭代,有一个大小为100的输出向量,但是实际上我们需要一个单独的输出值。
# 最简单的解决方案是将单元格包装在OutputProjectionWrapper中。
# cell = OutputProjectionWrapper(BasicRNNCell(num_units=n_neurous, activation=tf.nn.relu), output_size=n_outputs)

# 用技巧提高速度
layers = [make_rnn_cell() for _ in range(n_layers)]
if is_training:
    layers = [make_drop_cell() for _ in range(n_layers)]

multi_layer_cell = MultiRNNCell(layers)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurous])
stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])

loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

X_data = np.linspace(0, 15, 101)

'''
# 应用丢弃机制
Ejemplo n.º 33
0
def _LSTMCells(unit_list, act_fn_list):
    return MultiRNNCell([
        LSTMCell(unit, activation=act_fn)
        for unit, act_fn in zip(unit_list, act_fn_list)
    ])