def _build_decoder_cell(self):
        # no beam
        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length

        def attn_decoder_input_fn(inputs, attention):
            if not self.attn_input_feeding:
                return inputs
            _input_layer = Dense(self.hidden_units, dtype=self.dtype, name="attn_input_feeding")
            return _input_layer(array_ops.concat([inputs, attention], -1))
        
        # attention mechanism 'luong'
        with tf.variable_scope('shared_attention_mechanism'):
            self.attention_mechanism = attention_wrapper.LuongAttention(num_units=self.hidden_units, \
                                                                        memory=encoder_outputs, memory_sequence_length=encoder_inputs_length)        
        # build decoder cell
        self.init_decoder_cell_list = [self._build_single_cell() for i in range(self.depth)]
        decoder_initial_state = encoder_last_state
        
        self.decoder_cell_list = self.init_decoder_cell_list[:-1] + [attention_wrapper.AttentionWrapper(\
            cell = self.init_decoder_cell_list[-1], \
            attention_mechanism=self.attention_mechanism,\
            attention_layer_size=self.hidden_units,\
            cell_input_fn=attn_decoder_input_fn,\
            initial_cell_state=encoder_last_state[-1],\
            alignment_history=False)]
        batch_size = self.batch_size
        initial_state = [state for state in encoder_last_state]
        initial_state[-1] = self.decoder_cell_list[-1].zero_state(batch_size=batch_size, dtype=self.dtype)
        decoder_initial_state = tuple(initial_state)
        
        # beam
        beam_encoder_outputs = seq2seq.tile_batch(self.encoder_outputs, multiplier=self.beam_width)
        beam_encoder_last_state = nest.map_structure(lambda s: seq2seq.tile_batch(s, self.beam_width), self.encoder_last_state)
        beam_encoder_inputs_length = seq2seq.tile_batch(self.encoder_inputs_length, multiplier=self.beam_width)

        with tf.variable_scope('shared_attention_mechanism', reuse=True):
            self.beam_attention_mechanism = attention_wrapper.LuongAttention(num_units=self.hidden_units, \
                                                                             memory=beam_encoder_outputs, \
                                                                             memory_sequence_length=beam_encoder_inputs_length)

        beam_decoder_initial_state = beam_encoder_last_state
        self.beam_decoder_cell_list = self.init_decoder_cell_list[:-1] + [attention_wrapper.AttentionWrapper(\
            cell = self.init_decoder_cell_list[-1], \
            attention_mechanism=self.beam_attention_mechanism,\
            attention_layer_size=self.hidden_units,\
            cell_input_fn=attn_decoder_input_fn,\
            initial_cell_state=beam_encoder_last_state[-1],\
            alignment_history=False)]
            
        beam_batch_size = self.batch_size * self.beam_width
        beam_initial_state = [state for state in beam_encoder_last_state]
        beam_initial_state[-1] = self.beam_decoder_cell_list[-1].zero_state(batch_size=beam_batch_size, dtype=self.dtype)
        beam_decoder_initial_state = tuple(beam_initial_state)
        
        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state, \
               MultiRNNCell(self.beam_decoder_cell_list), beam_decoder_initial_state
    def _create_decoder_cell(self):
        enc_outputs, enc_states, enc_seq_len = self.enc_outputs, self.enc_states, self.enc_seq_len
        batch_size = self.batch_size * self.cfg.beam_size if self.use_beam_search else self.batch_size
        with tf.variable_scope("attention"):
            if self.cfg.attention == "luong":  # Luong attention mechanism
                attention_mechanism = LuongAttention(
                    num_units=self.cfg.num_units,
                    memory=enc_outputs,
                    memory_sequence_length=enc_seq_len)
            else:  # default using Bahdanau attention mechanism
                attention_mechanism = BahdanauAttention(
                    num_units=self.cfg.num_units,
                    memory=enc_outputs,
                    memory_sequence_length=enc_seq_len)

        def cell_input_fn(
            inputs, attention
        ):  # define cell input function to keep input/output dimension same
            # reference: https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/AttentionWrapper
            if not self.cfg.use_attention_input_feeding:
                return inputs
            input_project = tf.layers.Dense(self.cfg.num_units,
                                            dtype=tf.float32,
                                            name='attn_input_feeding')
            return input_project(tf.concat([inputs, attention], axis=-1))

        if self.cfg.top_attention:  # apply attention mechanism only on the top decoder layer
            cells = [
                self._create_rnn_cell() for _ in range(self.cfg.num_layers)
            ]
            cells[-1] = AttentionWrapper(
                cells[-1],
                attention_mechanism=attention_mechanism,
                name="Attention_Wrapper",
                attention_layer_size=self.cfg.num_units,
                initial_cell_state=enc_states[-1],
                cell_input_fn=cell_input_fn)
            initial_state = [state for state in enc_states]
            initial_state[-1] = cells[-1].zero_state(batch_size=batch_size,
                                                     dtype=tf.float32)
            dec_init_states = tuple(initial_state)
            cells = MultiRNNCell(cells)
        else:
            cells = MultiRNNCell(
                [self._create_rnn_cell() for _ in range(self.cfg.num_layers)])
            cells = AttentionWrapper(cells,
                                     attention_mechanism=attention_mechanism,
                                     name="Attention_Wrapper",
                                     attention_layer_size=self.cfg.num_units,
                                     initial_cell_state=enc_states,
                                     cell_input_fn=cell_input_fn)
            dec_init_states = cells.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=enc_states)
        return cells, dec_init_states
def create_rnn_cell(cell_type,
                    num_units,
                    num_layers=1,
                    dp_input_keep_prob=1.0,
                    dp_output_keep_prob=1.0,
                    activation=None):
    def single_cell(num_units):
        if cell_type == 'rnn':
            cell_class = BasicRNNCell
        elif cell_type == 'lstm':
            cell_class = LSTMCell
        elif cell_type == 'gru':
            cell_class = GRUCell
        else:
            raise ValueError('Cell Type Not Supported! ')

        if activation is not None:
            if activation == 'relu':
                activation_f = tf.nn.relu
            elif activation == 'sigmoid':
                activation_f = tf.sigmoid
            elif activation == 'elu':
                activation_f = tf.nn.elu
            else:
                raise ValueError('Activation Function Not Supported! ')
        else:
            activation_f = None

        if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
            return DropoutWrapper(cell_class(num_units=num_units,
                                             activation=activation_f),
                                  input_keep_prob=dp_input_keep_prob,
                                  output_keep_prob=dp_output_keep_prob)
        else:
            return cell_class(num_units=num_units)

    if isinstance(num_units, list):
        num_layers = len(num_units)
        if num_layers > 1:
            return MultiRNNCell(
                [single_cell(num_units[i]) for i in range(num_layers)])
        else:
            return single_cell(num_units[0])
    else:
        if num_layers > 1:
            return MultiRNNCell(
                [single_cell(num_units) for _ in range(num_layers)])
        else:
            return single_cell(num_units)
Exemple #4
0
 def _build_pre(self):
     self.dimA = 20
     self.cellA = MultiRNNCell([LSTMCell(self.dimA)] * 2)
     self.b1 = 0.95
     self.b2 = 0.95
     self.lr = 0.1
     self.eps = 1e-8
Exemple #5
0
    def impress(self, state_code, pre_impress_states):
        # LSTM, 3 layers
        self.impress_lay_num = 3
        with tf.variable_scope('impress', reuse=tf.AUTO_REUSE):
            def loop_fn(time, cell_output, cell_state, loop_state):
                if cell_output is None:#time = 0
                    # initialization
                    input = state_code
                    state = state_
                    emit_output = None
                    loop_state = None
                else:
                    input = cell_output
                    emit_output = cell_output
                    state = cell_state
                    loop_state = None
                    
                elements_finished = (time >= 1)
                return (elements_finished, input, state, emit_output, loop_state)

            multirnn_cell = MultiRNNCell([LSTMCell(self.impress_dim) 
                    for _ in range(self.impress_lay_num)],  state_is_tuple=True) 
            
            if pre_impress_states == None:
                state_ = (multirnn_cell.zero_state(self.batch_size, tf.float32))
            else:
                state_ = pre_impress_states   
    
            emit_ta, states, final_loop_state = tf.nn.raw_rnn(multirnn_cell, loop_fn)
            state_impress_code = tf.transpose(emit_ta.stack(), [1, 0, 2])[0] # transpose for putting batch dimension to first dimension
            
            return state_impress_code, final_loop_state
Exemple #6
0
    def build_decoder_cell(self):

        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length

        if self.use_beamsearch_decode:
            print ("use beamsearch decoding..")
            encoder_outputs = seq2seq.tile_batch(
                self.encoder_outputs, multiplier=self.beam_width)
            encoder_last_state = nest.map_structure(
                lambda s: seq2seq.tile_batch(s, self.beam_width), self.encoder_last_state)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)

        # Building attention mechanism: Default Bahdanau
        # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473
        self.attention_mechanism = attention_wrapper.BahdanauAttention(
            num_units=self.hidden_units, memory=encoder_outputs,
            memory_sequence_length=encoder_inputs_length,) 
        # 'Luong' style attention: https://arxiv.org/abs/1508.04025
        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = attention_wrapper.LuongAttention(
                num_units=self.hidden_units, memory=encoder_outputs, 
                memory_sequence_length=encoder_inputs_length,)
 
        # Building decoder_cell
        self.decoder_cell_list = [
            self.build_single_cell() for i in range(self.depth)]
        decoder_initial_state = encoder_last_state

        def attn_decoder_input_fn(inputs, attention):
            if not self.attn_input_feeding:
                return inputs

            # Essential when use_residual=True
            _input_layer = Dense(self.hidden_units, dtype=self.dtype,
                                 name='attn_input_feeding')
            return _input_layer(array_ops.concat([inputs, attention], -1))

        # AttentionWrapper wraps RNNCell with the attention_mechanism
        # Note: We implement Attention mechanism only on the top decoder layer
        self.decoder_cell_list[-1] = attention_wrapper.AttentionWrapper(
            cell=self.decoder_cell_list[-1],
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_units,
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=encoder_last_state[-1],
            alignment_history=False,
            name='Attention_Wrapper')

        batch_size = self.batch_size if not self.use_beamsearch_decode \
                     else self.batch_size * self.beam_width
        initial_state = [state for state in encoder_last_state]

        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
          batch_size=batch_size, dtype=self.dtype)
        decoder_initial_state = tuple(initial_state)

        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
Exemple #7
0
def create_rnn_cell(cell_type,
                    num_units,
                    num_layers=1,
                    dp_input_keep_prob=1.0,
                    dp_output_keep_prob=1.0):
    def single_cell(num_units):
        if cell_type == 'rnn':
            cell_class = BasicRNNCell
        elif cell_type == 'gru':
            cell_class = GRUCell
        elif cell_type == 'lstm':
            cell_class = LSTMCell
        else:
            raise ValueError('Cell Type Not Supported! ')

        if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
            return DropoutWrapper(cell_class(num_units=num_units),
                                  input_keep_prob=dp_input_keep_prob,
                                  output_keep_prob=dp_output_keep_prob)
        else:
            return cell_class(num_units=num_units)

    assert (len(num_units) == num_layers)
    if num_layers > 1:
        return MultiRNNCell(
            [single_cell(num_units[i]) for i in range(num_layers)])
    else:
        return single_cell(num_units[0])
    def Encoder(self, xs):
        encoder_input = tf.one_hot(tf.cast(xs, tf.int32), self.val_size_x) 
    
        encoder_input = self.WordEmb(encoder_input)
        
        if self.args.train:
            inputs_length = self.inputs_length_PH
        elif self.args.test:
            inputs_length = self.inputs_length_test_PH
            
        multirnn_cell = MultiRNNCell([LSTMCell(self.encoder_units) 
            for _ in range(self.encoder_lay_Num)],  state_is_tuple=True)
            
        (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = (
            tf.nn.bidirectional_dynamic_rnn(cell_fw=multirnn_cell, 
                                            cell_bw=multirnn_cell, inputs=encoder_input,
                                            sequence_length=inputs_length, dtype=self.dtype))
                                            
        sentence_code = tf.concat((fw_outputs, bw_outputs), axis = 2)
      
        sentence_code_ = []
        for i in range(self.batch_size):
            sentence_code_.append(sentence_code[i,inputs_length[i]-1,:])
        
        encoder_output = tf.stack(sentence_code_)
        
        encoder_output = tf.layers.dense(inputs=encoder_output, units=self.encoder_units, activation=tf.nn.relu)
        

  
        return encoder_output
Exemple #9
0
 def _build_pre(self, size):
     self.dimA = size
     self.num_of_layers = 2
     self.cellA = MultiRNNCell([LSTMCell(num_units=self.dimA) for _ in range(self.num_of_layers)])
     self.b1 = 0.95
     self.b2 = 0.95
     self.lr = 0.1
     self.eps = 1e-8
Exemple #10
0
 def _create_rnn_cell(self):
     if self.cfg["num_layers"] is None or self.cfg["num_layers"] <= 1:
         return self._create_single_rnn_cell(self.cfg["num_units"])
     else:
         MultiRNNCell([
             self._create_single_rnn_cell(self.cfg["num_units"])
             for _ in range(self.cfg["num_layers"])
         ])
Exemple #11
0
    def build_dec_cell(self, hidden_size):
        enc_outputs = self.enc_outputs
        enc_last_state = self.enc_last_state
        enc_inputs_length = self.enc_inp_len

        if self.use_beam_search:
            self.logger.info("using beam search decoding")
            enc_outputs = seq2seq.tile_batch(self.enc_outputs,
                                             multiplier=self.p.beam_width)
            enc_last_state = nest.map_structure(
                lambda s: seq2seq.tile_batch(s, self.p.beam_width),
                self.enc_last_state)
            enc_inputs_length = seq2seq.tile_batch(self.enc_inp_len,
                                                   self.p.beam_width)

        if self.p.attention_type.lower() == 'luong':
            self.attention_mechanism = attention_wrapper.LuongAttention(
                num_units=hidden_size,
                memory=enc_outputs,
                memory_sequence_length=enc_inputs_length)
        else:
            self.attention_mechanism = attention_wrapper.BahdanauAttention(
                num_units=hidden_size,
                memory=enc_outputs,
                memory_sequence_length=enc_inputs_length)

        def attn_dec_input_fn(inputs, attention):
            if not self.p.attn_input_feeding:
                return inputs
            else:
                _input_layer = Dense(hidden_size,
                                     dtype=self.p.dtype,
                                     name='attn_input_feeding')
                return _input_layer(tf.concat([inputs, attention], -1))

        self.dec_cell_list = [
            self.build_single_cell(hidden_size) for _ in range(self.p.depth)
        ]

        if self.p.use_attn:
            self.dec_cell_list[-1] = attention_wrapper.AttentionWrapper(
                cell=self.dec_cell_list[-1],
                attention_mechanism=self.attention_mechanism,
                attention_layer_size=hidden_size,
                cell_input_fn=attn_dec_input_fn,
                initial_cell_state=enc_last_state[-1],
                alignment_history=False,
                name='attention_wrapper')

        batch_size = self.p.batch_size if not self.use_beam_search else self.p.batch_size * self.p.beam_width
        initial_state = [state for state in enc_last_state]
        if self.p.use_attn:
            initial_state[-1] = self.dec_cell_list[-1].zero_state(
                batch_size=batch_size, dtype=self.p.dtype)
        dec_initial_state = tuple(initial_state)

        return MultiRNNCell(self.dec_cell_list), dec_initial_state
Exemple #12
0
    def Decoder(self, encoder_output):
        def loop_fn(time, cell_output, cell_state, loop_state):
            if cell_output is None:  #time = 0
                # initialization
                input = tf.concat((encoder_output, encoder_output), axis=1)
                state = (multirnn_cell.zero_state(self.batch_size, tf.float32))
                emit_output = None
                loop_state = None
                elements_finished = False
            else:
                emit_output = cell_output
                if self.args.test:
                    #decoder_units to val_size
                    transformed_output = tf.nn.xw_plus_b(
                        cell_output, self.decoder_W,
                        self.decoder_b)  #decoder_units to vac_size
                    #argmax
                    transformed_output = tf.argmax(transformed_output, 1)
                    transformed_output = tf.one_hot(transformed_output,
                                                    self.val_size,
                                                    on_value=1.0,
                                                    off_value=0.0,
                                                    axis=-1)
                    #val_size to decoder_units//2
                    transformed_output = self.WordEmb(transformed_output)
                elif self.args.train:
                    ys_onehot = tf.one_hot(self.ys_PH[:, (time - 1)],
                                           self.val_size,
                                           on_value=1.0,
                                           off_value=0.0,
                                           axis=-1)
                    transformed_output = self.WordEmb(ys_onehot)

                input = tf.concat([transformed_output, encoder_output], axis=1)
                state = cell_state
                loop_state = None
            elements_finished = (time >= self.max_len)
            return (elements_finished, input, state, emit_output, loop_state)

        multirnn_cell = MultiRNNCell(
            [LSTMCell(self.decoder_units) for _ in range(self.lay_num)],
            state_is_tuple=True)
        emit_ta, final_state, final_loop_state = tf.nn.raw_rnn(
            multirnn_cell, loop_fn)

        # transpose for putting batch dimension to first dimension
        outputs = tf.transpose(emit_ta.stack(), [1, 0, 2])

        #transform decoder_units to val_size
        decoder_output_flat = tf.reshape(outputs, [-1, self.decoder_units])
        decoder_output_transform_flat = tf.nn.xw_plus_b(
            decoder_output_flat, self.decoder_W, self.decoder_b)
        decoder_logits = tf.reshape(
            decoder_output_transform_flat,
            (self.batch_size, self.max_len, self.val_size))

        return decoder_logits
Exemple #13
0
def build_decoder_cell(rank, u_emb, batch_size, depth=2):
  cell = []
  for i in range(depth):
    if i == 0:
      cell.append(LSTMCell(rank, state_is_tuple=True))
    else:
      cell.append(ResidualWrapper(LSTMCell(rank, state_is_tuple=True)))
  initial_state = LSTMStateTuple(tf.zeros_like(u_emb), u_emb)
  initial_state = [initial_state, ]
  for i in range(1, depth):
    initial_state.append(cell[i].zero_state(batch_size, tf.float32))
  return MultiRNNCell(cell), tuple(initial_state)
def stacked_rnn_step(input_vocabulary_size,
                     hidden_size=13,
                     emb_dim=11,
                     n_layers=2,
                     variable_scope='encdec'):
    with tf.variable_scope(variable_scope, reuse=None):
        rnn_cell = MultiRNNCell([LSTMCell(hidden_size)] *
                                n_layers)  # stacked LSTM
        proj_wrapper = InputProjectionWrapper(rnn_cell, emb_dim)
    embedding_wrapper = EmbeddingWrapper(proj_wrapper, input_vocabulary_size,
                                         emb_dim)
    return embedding_wrapper
Exemple #15
0
    def createGraph(self):

        self.input = tf.placeholder(tf.int32, [self.batch_size, self.seq_len],
                                    name='inputs')
        self.targs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len],
                                    name='targets')
        onehot = tf.one_hot(self.input, self.vocab_size, name='input_oh')

        inputs = tf.split(onehot, self.seq_len, 1)
        inputs = [tf.squeeze(i, [1]) for i in inputs]
        targets = tf.split(self.targs, self.seq_len, 1)

        with tf.variable_scope("posRNN"):

            cells = [GRUCell(self.num_hidden) for _ in range(self.num_layers)]

            stacked = MultiRNNCell(cells, state_is_tuple=True)
            self.zero_state = stacked.zero_state(self.batch_size, tf.float32)

            outputs, self.last_state = seq2seq.rnn_decoder(
                inputs, self.zero_state, stacked)

            w = tf.get_variable(
                "w", [self.num_hidden, self.vocab_size],
                tf.float32,
                initializer=tf.random_normal_initializer(stddev=0.02))
            b = tf.get_variable("b", [self.vocab_size],
                                initializer=tf.constant_initializer(0.0))
            logits = [tf.matmul(o, w) + b for o in outputs]

            const_weights = [
                tf.ones([self.batch_size]) for _ in xrange(self.seq_len)
            ]
            self.loss = seq2seq.sequence_loss(logits, targets, const_weights)

            self.opt = tf.train.AdamOptimizer(0.001,
                                              beta1=0.5).minimize(self.loss)

        with tf.variable_scope("posRNN", reuse=True):

            batch_size = 1
            self.s_inputs = tf.placeholder(tf.int32, [batch_size],
                                           name='s_inputs')
            s_onehot = tf.one_hot(self.s_inputs,
                                  self.vocab_size,
                                  name='s_input_oh')

            self.s_zero_state = stacked.zero_state(batch_size, tf.float32)
            s_outputs, self.s_last_state = seq2seq.rnn_decoder(
                [s_onehot], self.s_zero_state, stacked)
            s_outputs = tf.reshape(s_outputs, [1, self.num_hidden])
            self.s_probs = tf.nn.softmax(tf.matmul(s_outputs, w) + b)
Exemple #16
0
    def model(data, weights, biases):
        cell = LSTMCell(NUM_NEURONS)  # Or LSTMCell(num_neurons)
        cell = MultiRNNCell([cell] * NUM_LAYERS)

        output, _ = tf.nn.rnn(cell, train_data_node, dtype=DATA_TYPE)
        output = tf.transpose(output, [1, 0, 2])
        last = tf.gather(output, int(output.get_shape()[0]) - 1)
        out_size = int(train_labels_node.get_shape()[1])

        prediction = tf.nn.softmax(
            tf.matmul(last, weights['out']) + biases['out'])
        # cross_entropy = -tf.reduce_sum(train_labels_node * tf.log(prediction))
        return prediction
Exemple #17
0
 def prediction(self):
     # Recurrent network.
     network = GRUCell(self._num_hidden)
     network = DropoutWrapper(network, output_keep_prob=self.dropout)
     network = MultiRNNCell([network] * self._num_layers)
     output, _ = tf.nn.dynamic_rnn(network, data, dtype=tf.float32)
     # Select last output.
     output = tf.transpose(output, [1, 0, 2])
     last = tf.gather(output, int(output.get_shape()[0]) - 1)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Exemple #18
0
    def build_cell(self, hidden_units, depth=1):
        '''Create forward and reverse RNNCell networks.

            Args:
              hidden_units: Units of RNNCell.
              depth: The number of RNNCell layers.

            Returns:
              An example of RNNCell
            '''
        cell_lists = [
            self.build_single_cell(hidden_units) for i in range(depth)
        ]
        return MultiRNNCell(cell_lists)
Exemple #19
0
    def build_decoder_cell(self):
        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length
        # building attention mechanism: default Bahdanau
        # 'Bahdanau': https://arxiv.org/abs/1409.0473
        self.attention_mechanism = attention_wrapper.BahdanauAttention(
            num_units=self.hidden_size,
            memory=encoder_outputs,
            memory_sequence_length=encoder_inputs_length)
        # 'Luong': https://arxiv.org/abs/1508.04025
        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = attention_wrapper.LuongAttention(
                num_units=self.hidden_size,
                memory=self.encoder_outputs,
                memory_sequence_length=self.encoder_inputs_length)

        # building decoder_cell
        self.decoder_cell_list = [
            self.build_single_cell() for _ in range(self.layer_num)
        ]

        def att_decoder_input_fn(inputs, attention):
            if not self.use_att_decoding:
                return inputs

            _input_layer = Dense(self.hidden_size,
                                 dtype=self.dtype,
                                 name='att_input_feeding')
            return _input_layer(array_ops.concat([inputs, attention], axis=-1))

        # AttentionWrapper wraps RNNCell with the attention_mechanism
        # implement attention mechanism only on the top of decoder layer
        self.decoder_cell_list[-1] = attention_wrapper.AttentionWrapper(
            cell=self.decoder_cell_list[-1],
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_size,
            cell_input_fn=att_decoder_input_fn,
            initial_cell_state=encoder_last_state[
                -1],  # last hidden state of last encode layer
            alignment_history=False,
            name='Attention_Wrapper')
        initial_state = [state for state in encoder_last_state]
        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
            batch_size=self.batch_size, dtype=self.dtype)
        decoder_initial_state = tuple(initial_state)
        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
Exemple #20
0
 def gru_net_initial(self,
                     hidden_units,
                     input_data,
                     initial_state,
                     input_length,
                     depth=1):
     cell_lists = [
         self.build_single_cell(hidden_units) for i in range(depth)
     ]
     multi_cell = MultiRNNCell(cell_lists, state_is_tuple=False)
     input_length = tf.reshape(input_length, [-1])
     output, state = tf.nn.dynamic_rnn(multi_cell,
                                       input_data,
                                       sequence_length=input_length,
                                       initial_state=initial_state,
                                       dtype=tf.float32)
     return output
Exemple #21
0
def create_rnn_cell(cell_type,
                    num_units,
                    num_layers=1,
                    dp_input_keep_prob=1.0,
                    dp_output_keep_prob=1.0,
                    residual_connections=False):
    """
	TODO: MOVE THIS properly to utils. Write doc
	:param cell_type:
	:param num_units:
	:param num_layers:
	:param dp_input_keep_prob:
	:param dp_output_keep_prob:
	:param residual_connections:
	:return:
	"""
    def single_cell(num_units):
        if cell_type == "lstm":
            cell_class = LSTMCell
        elif cell_type == "gru":
            cell_class = GRUCell

        if residual_connections:
            if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
                return DropoutWrapper(ResidualWrapper(
                    cell_class(num_units=num_units)),
                                      input_keep_prob=dp_input_keep_prob,
                                      output_keep_prob=dp_output_keep_prob)
            else:
                return ResidualWrapper(cell_class(num_units=num_units))
        else:
            if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
                return DropoutWrapper(cell_class(num_units=num_units),
                                      input_keep_prob=dp_input_keep_prob,
                                      output_keep_prob=dp_output_keep_prob)
            else:
                return cell_class(num_units=num_units)

    if num_layers > 1:
        return MultiRNNCell(
            [single_cell(num_units) for _ in range(num_layers)])
    else:
        return single_cell(num_units)
Exemple #22
0
    def build_graph(self):
        with tf.variable_scope('lstm'):
            lstm_cell = LSTMCell(self.layer_size)
            rnn_cell = MultiRNNCell([lstm_cell] * self.layers)
            cell_output, self.init_state = rnn_cell(self.model_input,
                                                    self.init_state)
            print("%i layers created" % self.layers)
            self.output_layer = self.__add_output_layer(
                "fc_out", cell_output, self.layer_size, self.output_dim)

            self.output_layer = tf.Print(
                self.output_layer,
                [self.output_layer,
                 tf.convert_to_tensor(self.ground_truth)],
                'Value of output layer and ground truth:',
                summarize=6)

            tf.histogram_summary('lstm_output', self.output_layer)

            return self.output_layer
Exemple #23
0
 def _create_rnn_cell(self):
     if self.cfg["num_layers"] is None or self.cfg["num_layers"] <= 1:
         return self._create_single_rnn_cell(self.cfg["num_units"])
     else:
         if self.cfg["use_stack_rnn"]:
             lstm_cells = []
             for i in range(self.cfg["num_layers"]):
                 cell = tf.nn.rnn_cell.LSTMCell(
                     self.cfg["num_units"],
                     initializer=tf.initializers.orthogonal)
                 cell = tf.contrib.rnn.DropoutWrapper(
                     cell,
                     state_keep_prob=self.keep_prob,
                     input_keep_prob=self.keep_prob,
                     dtype=tf.float32)
                 lstm_cells.append(cell)
             return lstm_cells
         else:
             return MultiRNNCell([
                 self._create_single_rnn_cell(self.cfg["num_units"])
                 for _ in range(self.cfg["num_layers"])
             ])
Exemple #24
0
    def build_decoder_cell(self):
        # No beam search currently

        # Attention
        # TODO: other attention mechanism?
        attention_mechanism = BahdanauAttention(
            num_units=self.config.hidden_units,
            memory=self.encoder_outputs,
            memory_sequence_length=self.encoder_inputs_length)

        decoder_cells = [LSTMCell(self.config.hidden_units)
                         ] * self.config.decoder_depth
        decoder_initial_state = list(self.encoder_last_state)

        def attn_decoder_input_fn(inputs, attention):
            if not self.config.attn_input_feeding:
                return inputs

            # Essential when use_residual=True
            _input_layer = Dense(self.config.hidden_units,
                                 dtype=tf.float32,
                                 name='attn_input_feeding')
            return _input_layer(concat([inputs, attention], -1))

        #Add an attentionWrapper in the lastest layer of decoder
        decoder_cells[-1] = AttentionWrapper(
            cell=decoder_cells[-1],
            attention_mechanism=attention_mechanism,
            attention_layer_size=self.config.hidden_units,
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=decoder_initial_state[-1],
            alignment_history=False,
            name='Attention_Wrapper')

        decoder_initial_state[-1] = decoder_cells[-1].zero_state(
            batch_size=self.batch_size, dtype=tf.float32)
        decoder_initial_state = tuple(decoder_initial_state)

        return MultiRNNCell(decoder_cells), decoder_initial_state
Exemple #25
0
    def build_decoder_cell(self):

        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length

        # To use BeamSearchDecoder, encoder_outputs, encoder_last_state,
        # encoder_inputs_length
        # needs to be tiled so that: [batch_size, .., ..] -> [batch_size x
        # beam_width, .., ..]
        if self.use_beamsearch_decode:
            print("use beamsearch decoding..")
            encoder_outputs = seq2seq.tile_batch(self.encoder_outputs,
                                                 multiplier=self.beam_width)
            encoder_last_state = nest.map_structure(
                lambda s: seq2seq.tile_batch(s, self.beam_width),
                self.encoder_last_state)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)

        # Building decoder_cell
        self.decoder_cell_list = [
            self.build_single_cell() for i in range(self.depth)
        ]
        # ADD GPU SUPPORT FOR DISTRIBUION
        decoder_initial_state = encoder_last_state

        # Also if beamsearch decoding is used, the batch_size argument in
        # .zero_state
        # should be ${decoder_beam_width} times to the origianl batch_size
        batch_size = self.batch_size if not self.use_beamsearch_decode \
                     else self.batch_size * self.beam_width
        initial_state = [state for state in encoder_last_state]

        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
            batch_size=batch_size, dtype=self.dtype)
        decoder_initial_state = tuple(initial_state)

        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
Exemple #26
0
def create_cudnn_LSTM_cell(num_units,
                           input_size,
                           num_layers=1,
                           dp_input_keep_prob=1.0,
                           dp_output_keep_prob=1.0):
    def single_cell(name):
        with tf.variable_scope(name):
            if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
                return DropoutWrapper(cudnn_LSTMCell(
                    num_units=num_units,
                    input_size=input_size,
                    direction='unidirectional'),
                                      input_keep_prob=dp_input_keep_prob,
                                      output_keep_prob=dp_output_keep_prob)
            else:
                return cudnn_LSTMCell(num_units=num_units,
                                      input_size=input_size,
                                      direction='unidirectional')

    if num_layers > 1:
        return MultiRNNCell(
            [single_cell('layer_%d' % i) for i in range(num_layers)])
    else:
        return single_cell('layer_0')
    def build_encoder_cell(self):

        return MultiRNNCell(
            [self.build_single_cell() for i in range(self.depth)])
Exemple #28
0
    def __init__(self, model_parameters, training_parameters, directories,
                 **kwargs):
        """ Initialization of the RNN Model as TensorFlow computational graph
    """

        self.model_parameters = model_parameters
        self.training_parameters = training_parameters
        self.directories = directories

        # Define model hyperparameters Tensors
        with tf.name_scope("Parameters"):
            self.learning_rate = tf.placeholder(tf.float32,
                                                name="learning_rate")
            self.momentum = tf.placeholder(tf.float32, name="momentum")
            self.input_keep_probability = tf.placeholder(
                tf.float32, name="input_keep_probability")
            self.output_keep_probability = tf.placeholder(
                tf.float32, name="output_keep_probability")

            self.is_training = tf.placeholder(tf.bool)

        # Define input, output and initialization Tensors
        with tf.name_scope("Input"):
            self.inputs = tf.placeholder("float", [
                None, self.model_parameters.sequence_length,
                self.model_parameters.input_dimension
            ],
                                         name='input_placeholder')

            self.targets = tf.placeholder(
                "float", [None, self.model_parameters.sequence_length, 1],
                name='labels_placeholder')

            self.init = tf.placeholder(
                tf.float32,
                shape=[None, self.model_parameters.state_size],
                name="init")

        # Define the TensorFlow RNN computational graph
        with tf.name_scope("LSTMRNN_RNN"):
            cells = []

            # Define the layers
            for _ in range(self.model_parameters.n_layers):
                if self.model_parameters.model == 'rnn':
                    cell = BasicRNNCell(self.model_parameters.state_size)
                elif self.model_parameters.model == 'gru':
                    cell = GRUCell(self.model_parameters.state_size)
                elif self.model_parameters.model == 'lstm':
                    cell = BasicLSTMCell(self.model_parameters.state_size,
                                         state_is_tuple=True)
                elif self.model_parameters.model == 'nas':
                    cell = NASCell(self.model_parameters.state_size)
                else:
                    raise Exception("model type not supported: {}".format(
                        self.model_parameters.model))

                if (self.model_parameters.output_keep_probability < 1.0
                        or self.model_parameters.input_keep_probability < 1.0):

                    if self.model_parameters.output_keep_probability < 1.0:
                        cell = DropoutWrapper(
                            cell,
                            output_keep_prob=self.output_keep_probability)

                    if self.model_parameters.input_keep_probability < 1.0:
                        cell = DropoutWrapper(
                            cell, input_keep_prob=self.input_keep_probability)

                cells.append(cell)
            cell = MultiRNNCell(cells)

            # Simulate time steps and get RNN cell output
            self.outputs, self.next_state = tf.nn.dynamic_rnn(cell,
                                                              self.inputs,
                                                              dtype=tf.float32)

        # Define cost Tensors
        with tf.name_scope("LSTMRNN_Cost"):

            # Flatten to apply same weights to all time steps
            self.flattened_outputs = tf.reshape(
                self.outputs, [-1, self.model_parameters.state_size],
                name="flattened_outputs")

            self.output_w = tf.Variable(tf.truncated_normal(
                [self.model_parameters.state_size, 1], stddev=0.01),
                                        name="output_weights")

            self.variable_summaries(self.output_w, 'output_weights')

            self.output_b = tf.Variable(tf.constant(0.1), name="output_biases")

            self.variable_summaries(self.output_w, 'output_biases')

            # Define decision threshold Tensor
            self.decision_threshold = tf.Variable(
                self.model_parameters.threshold, name="decision_threshold")

            # Define moving average step Tensor
            self.ma_step = tf.Variable(self.model_parameters.ma_step,
                                       name="ma_step")

            # Softmax activation layer, using RNN inner loop last output
            # logits and labels must have the same shape [batch_size, num_classes]

            self.logits = tf.add(tf.matmul(self.flattened_outputs,
                                           self.output_w),
                                 self.output_b,
                                 name="logits")

            self.logits_bn = self.batch_norm_wrapper(
                inputs=self.logits, is_training=self.is_training)

            tf.summary.histogram('logits', self.logits)
            tf.summary.histogram('logits_bn', self.logits_bn)

            self.predictions = tf.reshape(
                self.logits, [-1, self.model_parameters.sequence_length, 1],
                name="predictions")

            self.shaped_predictions = tf.reshape(self.predictions, [-1],
                                                 name="shaped_predictions")

            self.tmp_smoothed_predictions = tf.concat(
                [
                    self.shaped_predictions,
                    tf.fill(
                        tf.expand_dims(self.ma_step - 1, 0),
                        self.shaped_predictions[
                            tf.shape(self.shaped_predictions)[0] - 1])
                ],
                axis=0,
                name="tmp_smoothed_predictions")

            self.ma_loop_idx = tf.constant(0, dtype='int32')
            self.shaped_smoothed_predictions = tf.zeros([0], dtype='float32')

            _, self.shaped_smoothed_predictions = tf.while_loop(
                lambda i, _: i < tf.shape(self.shaped_predictions)[0],
                self.ma_while_body,
                [self.ma_loop_idx, self.shaped_smoothed_predictions],
                shape_invariants=[tf.TensorShape([]),
                                  tf.TensorShape([None])])

            self.smoothed_predictions = tf.reshape(
                self.shaped_smoothed_predictions,
                [-1, self.model_parameters.sequence_length, 1],
                name="smoothed_predictions")

            self.soft_predictions_summary = tf.summary.tensor_summary(
                "soft_predictions", self.smoothed_predictions)
            # self.soft_predictions_summary = tf.summary.tensor_summary("soft_predictions", self.predictions)

            # self.shaped_logits = tf.reshape(self.logits,
            #   [-1, self.model_parameters.sequence_length, 1],
            #   name="shaped_logits")

            # Cross-Entropy
            # self.cost = tf.reduce_mean(-tf.reduce_sum(
            #   self.targets * tf.log(self.predictions),
            #   reduction_indices=[2]), name="cross_entropy")

            # self.cross_entropy = tf.reduce_mean(
            #   tf.nn.sigmoid_cross_entropy_with_logits(_sentinel=None,
            #     labels=self.targets,
            #     logits=self.predictions),
            #   name="cross_entropy")

            # self.cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
            #   _sentinel=None,
            #   labels=self.targets,
            #   logits=self.shaped_logits,
            #   name="cross_entropy")

            # Root Mean Squared Error
            # self.mean_squared_error = tf.losses.mean_squared_error(
            #   labels=self.targets,
            #   predictions=self.predictions)

            self.cost = tf.sqrt(
                tf.reduce_mean(
                    tf.squared_difference(self.smoothed_predictions,
                                          self.targets)))

            # self.cost = tf.sqrt(tf.reduce_mean(
            #   tf.squared_difference(
            #     self.predictions, self.targets)))

            tf.summary.scalar('training_cost', self.cost)

            # self.cost = tf.reduce_mean(
            #   self.cross_entropy,
            #   name="cost")

            voicing_condition = tf.greater(
                self.smoothed_predictions,
                tf.fill(tf.shape(self.smoothed_predictions),
                        self.decision_threshold),
                name="thresholding")

            # voicing_condition = tf.greater(self.predictions,
            #   tf.fill(tf.shape(self.predictions), self.decision_threshold),
            #   name="thresholding")

            self.label_predictions = tf.where(
                voicing_condition,
                tf.ones_like(self.smoothed_predictions),
                tf.zeros_like(self.smoothed_predictions),
                name="label_predictions")

            # self.label_predictions = tf.where(voicing_condition,
            #   tf.ones_like(self.predictions) ,
            #   tf.zeros_like(self.predictions),
            #   name="label_predictions")

            self.hard_predictions_summary = tf.summary.tensor_summary(
                "hard_predictions", self.label_predictions)

            self.correct_prediction = tf.equal(self.label_predictions,
                                               self.targets,
                                               name="correct_predictions")

            self.r = tf.reshape(self.targets, [-1])
            self.h = tf.reshape(self.label_predictions, [-1])

            # Defined outside the while loop to avoid problems
            self.dump_one = tf.constant(1, dtype=tf.int32, shape=[])

            self.temp_pk_miss = tf.Variable([0], tf.int32, name='temp_pk_miss')
            self.temp_pk_falsealarm = tf.Variable([0],
                                                  tf.int32,
                                                  name='temp_pk_falsealarm')
            self.loop_idx = tf.constant(0, dtype=tf.int32, name='loop_idx')

            self.loop_vars = self.loop_idx, self.temp_pk_miss, self.temp_pk_falsealarm

            _, self.all_temp_pk_miss, self.all_temp_pk_falsealarm = tf.while_loop(
                self.while_condition,
                self.while_body,
                self.loop_vars,
                shape_invariants=(self.loop_idx.get_shape(),
                                  tf.TensorShape([None]),
                                  tf.TensorShape([None])))

            self.pk_miss = tf.reduce_mean(
                tf.cast(self.all_temp_pk_miss, tf.float32))
            tf.summary.scalar('p_miss', self.pk_miss)

            self.pk_falsealarm = tf.reduce_mean(
                tf.cast(self.all_temp_pk_falsealarm, tf.float32))
            tf.summary.scalar('p_falsealarm', self.pk_falsealarm)

            self.pk = tf.reduce_mean(tf.cast(
                tf.add(self.all_temp_pk_miss, self.all_temp_pk_falsealarm),
                tf.float32),
                                     name='pk')

            tf.summary.scalar('pk', self.pk)

            self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction,
                                                   tf.float32),
                                           name="accuracy")

            tf.summary.scalar('accuracy', self.accuracy)

            self.recall, self.update_op_recall = tf.metrics.recall(
                labels=self.targets,
                predictions=self.label_predictions,
                name="recall")

            tf.summary.scalar('recall', self.recall)

            self.precision, self.update_op_precision = tf.metrics.precision(
                labels=self.targets,
                predictions=self.label_predictions,
                name="precision")

            tf.summary.scalar('precision', self.precision)

        # Define Training Tensors
        with tf.name_scope("LSTMRNN_Train"):

            # Momentum optimisation
            self.optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate,
                momentum=self.momentum,
                name="optimizer")

            self.train_step = self.optimizer.minimize(self.cost,
                                                      name="train_step")

            # Initializing the variables
            self.initializer = tf.group(tf.global_variables_initializer(),
                                        tf.local_variables_initializer())
Exemple #29
0
 def build_modified_grnn_cell(self, hidden_units):
     cell = ModifiedGrnn(hidden_units)
     return MultiRNNCell([cell])
Exemple #30
0
 def build_simple_grnn_cell(self, hidden_units):
     cell = SimpleGrnn(hidden_units)
     return MultiRNNCell([cell])