Esempio n. 1
0
def reshape_state(state, other_shapes):
    with tf.name_scope(sys._getframe().f_code.co_name):
        if isinstance(state, LSTMStateTuple):
            new_c = tf.reshape(state.c, other_shapes + [shape(state.c, -1)])
            new_h = tf.reshape(state.h, other_shapes + [shape(state.h, -1)])
            state = LSTMStateTuple(c=new_c, h=new_h)
        else:
            state = tf.reshape(state, other_shapes + [shape(state, -1)])
        return state
Esempio n. 2
0
 def add_byway_attn_state(self, attention_states, attention_lengths):
     with tf.name_scope('add_byway_attn_state'):
         batch_size = shape(attention_states, 0)
         state_size = shape(attention_states, -1)
         byway_state = tf.get_variable('byway_state',
                                       [state_size])  # [state_size]
         byway_state = tf.expand_dims(byway_state, 0)  # [1, state_size]
         byway_state = tf.expand_dims(byway_state, 0)  # [1, 1, state_size]
         byway_state = tf.tile(byway_state,
                               [batch_size, 1, 1])  # [1, 1, state_size]
         attention_states = tf.concat([byway_state, attention_states],
                                      axis=1)
         attention_lengths += 1
         return attention_states, attention_lengths
Esempio n. 3
0
def setup_birnn(inputs, sequence_length, cell_type, hidden_size, use_residual,
                keep_prob):
    with tf.name_scope(sys._getframe().f_code.co_name):
        batch_size = shape(inputs, 0)
        # For 'initial_state' of CustomLSTMCell, different scopes are required in these initializations.
        with tf.variable_scope('fw_cell'):
            cell_fw = setup_cell(cell_type,
                                 hidden_size,
                                 use_residual,
                                 keep_prob=keep_prob)
            initial_state_fw = cell_fw.initial_state(batch_size) if hasattr(
                cell_fw, 'initial_state') else None

        with tf.variable_scope('bw_cell'):
            cell_bw = setup_cell(cell_type,
                                 hidden_size,
                                 use_residual,
                                 keep_prob=keep_prob)
            initial_state_bw = cell_bw.initial_state(batch_size) if hasattr(
                cell_bw, 'initial_state') else None

        outputs, state = rnn.bidirectional_dynamic_rnn(
            cell_fw,
            cell_bw,
            inputs,
            initial_state_fw=initial_state_fw,
            initial_state_bw=initial_state_bw,
            sequence_length=sequence_length,
            dtype=tf.float32)
        return outputs, state
Esempio n. 4
0
    def define_combination(self, all_models):
        '''
    Define adversarial layers. 
    *Note* this function must be executed after all other models were defined. 
    '''
        adv_models, input_repls, output_label_ids = self.set_label_by_model(
            all_models)
        n_labels = max(output_label_ids) + 1
        gradients = []
        # dbgprint(adv_models)
        # dbgprint(input_repls)
        # dbgprint(output_label_ids)

        loss_by_model = []
        gradients_by_model = []
        for model, input_repl, output_id in zip(adv_models, input_repls,
                                                output_label_ids):
            # To ensure the adversarial learning using outputs from a task is assigned to the same GPU.
            task_idx = all_models.index(model)
            # device = assign_device(task_idx)
            # sys.stderr.write('Defining adversarial layer in %s...\n' % (device))
            # with tf.device(device):
            with tf.name_scope('adversarial'):
                hidden = flip_gradient(input_repl)
                for depth in range(self.config.ffnn_depth - 1):
                    with tf.variable_scope('hidden%d' % (depth + 1)) as scope:
                        hidden = linear(hidden, shape(hidden, -1), scope=scope)
                        hidden = tf.nn.dropout(hidden, self.keep_prob)

                with tf.variable_scope('output') as scope:
                    logits = linear(hidden,
                                    n_labels,
                                    activation=None,
                                    scope=scope)
                #logits = tf.reshape(logits, [-1, n_labels])
                tiled_output_label_id = tf.tile([output_id],
                                                [shape(logits, 0)])
                loss = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=tiled_output_label_id, logits=logits))
                gradients = self.compute_gradients(self.loss_weight * loss)
                loss_by_model.append(loss)
                gradients_by_model.append(gradients)

        loss = tf.reduce_mean(loss_by_model)
        gradients = average_gradients(gradients_by_model)
        return loss, gradients
Esempio n. 5
0
    def setup_decoder_cell(self, config, keep_prob, use_beam_search,
                           init_state, attention_states, attention_lengths):
        batch_size = get_state_shape(init_state)[0]
        if use_beam_search:
            attention_states = tile_batch(attention_states,
                                          multiplier=self.beam_width)
            init_state = nest.map_structure(
                lambda s: tile_batch(s, self.beam_width), init_state)
            attention_lengths = tile_batch(attention_lengths,
                                           multiplier=self.beam_width)
            batch_size = batch_size * self.beam_width

        attention_size = shape(attention_states, -1)
        attention = getattr(tf.contrib.seq2seq, config.attention_type)(
            attention_size,
            attention_states,
            memory_sequence_length=attention_lengths)

        def cell_input_fn(inputs, attention):
            # define cell input function to keep input/output dimension same
            if not config.use_attention_input_feeding:
                return inputs
            attn_project = tf.layers.Dense(config.hidden_size,
                                           dtype=tf.float32,
                                           name='attn_input_feeding',
                                           activation=self.activation)
            return attn_project(tf.concat([inputs, attention], axis=-1))

        cells = _setup_decoder_cell(config, keep_prob)
        if config.top_attention:  # apply attention mechanism only on the top decoder layer
            cells[-1] = AttentionWrapper(
                cells[-1],
                attention_mechanism=attention,
                name="AttentionWrapper",
                attention_layer_size=config.hidden_size,
                alignment_history=use_beam_search,
                initial_cell_state=init_state[-1],
                cell_input_fn=cell_input_fn)
            init_state = [state for state in init_state]
            init_state[-1] = cells[-1].zero_state(batch_size=batch_size,
                                                  dtype=tf.float32)
            init_state = tuple(init_state)
            cells = MultiRNNCell(cells)
        else:
            cells = MultiRNNCell(cells)
            cells = AttentionWrapper(cells,
                                     attention_mechanism=attention,
                                     name="AttentionWrapper",
                                     attention_layer_size=config.hidden_size,
                                     alignment_history=use_beam_search,
                                     initial_cell_state=init_state,
                                     cell_input_fn=cell_input_fn)
            init_state = cells.zero_state(batch_size=batch_size, dtype=tf.float32) \
                              .clone(cell_state=init_state)
        return cells, init_state
Esempio n. 6
0
def get_state_shape(state):
    '''
  Return the size of an encoder-state. 
  If 'state' is a list of states, return that of the first one.
  '''
    def _get_lstm_state_size(state):
        return [shape(state.h, i) for i in range(len(state.h.get_shape()))]

    if nest.is_sequence(state):
        if isinstance(state[0], LSTMStateTuple):
            return _get_lstm_state_size(state[0])
        else:
            return [
                shape(state[0], i) for i in range(len(state[0].get_shape()))
            ]
    else:
        if isinstance(state, LSTMStateTuple):
            return _get_lstm_state_size(state)
        else:
            return [shape(state, i) for i in range(len(state.get_shape()))]
Esempio n. 7
0
    def char_encode(self, inputs):
        '''
    Args:
    - inputs: [*, max_sequence_length, max_word_length]
    Return:
    - outputs: [*, max_sequence_length, cnn_output_size]
    '''
        if inputs is None:
            return inputs

        with tf.variable_scope(self.scope or "WordEncoder"):
            # Flatten the input tensor to each word (rank-3 tensor).
            with tf.name_scope('flatten'):
                char_repls = tf.nn.embedding_lookup(
                    self.embeddings.char,
                    inputs)  # [*, max_word_len, char_emb_size]
                other_shapes = [
                    shape(char_repls, i)
                    for i in range(len(char_repls.get_shape()[:-2]))
                ]

                flattened_batch_size = reduce(lambda x, y: x * y, other_shapes)
                max_sequence_len = shape(char_repls, -2)
                char_emb_size = shape(char_repls, -1)

                flattened_char_repls = tf.reshape(
                    char_repls,
                    [flattened_batch_size, max_sequence_len, char_emb_size])

            cnn_outputs = cnn(flattened_char_repls
                              )  # [flattened_batch_size, cnn_output_size]
            outputs = tf.reshape(
                cnn_outputs, other_shapes +
                [shape(cnn_outputs, -1)])  # [*, cnn_output_size]
            outputs = tf.nn.dropout(outputs, self.keep_prob)
        return outputs
Esempio n. 8
0
 def add_bos_and_eos(self, target, start_token, end_token):
     with tf.name_scope('add_BOS_and_EOS'):
         # add start_token (end_token) to decoder's input (output).
         batch_size = shape(target, 0)
         with tf.name_scope('start_tokens'):
             start_tokens = tf.tile(
                 tf.constant([start_token], dtype=tf.int32), [batch_size])
         with tf.name_scope('end_tokens'):
             end_tokens = tf.tile(tf.constant([end_token], dtype=tf.int32),
                                  [batch_size])
         dec_input_tokens = tf.concat(
             [tf.expand_dims(start_tokens, 1), target], axis=1)
         dec_output_tokens = tf.concat(
             [target, tf.expand_dims(end_tokens, 1)], axis=1)
     return dec_input_tokens, dec_output_tokens
Esempio n. 9
0
def extend_vocab_for_oov(embeddings, inputs, unk_id):
    '''
  Copy the embeddings of OOV to expand word embedding matrix by the number of unique OOV words (mainly for CopyNet)
  <Args>
  - embeddings: A Tensor ([vocab_size, emb_size]).
  - inputs: 
  - unk_id: An integer.
  '''
    with tf.name_scope(sys._getframe().f_code.co_name):
        unk_emb = tf.expand_dims(embeddings[unk_id, :], 0)  # [1, emb_size]
        num_oov_words = tf.maximum(
            tf.reduce_max(inputs) - shape(embeddings, 0), 0)

        oov_embeddings = tf.tile(
            unk_emb, [num_oov_words, 1])  # [num_oov_words, emb_size]
        extended_embeddings = tf.concat([embeddings, oov_embeddings], axis=0)
    return extended_embeddings
Esempio n. 10
0
    def decode_train(self,
                     dec_input_tokens,
                     dec_lengths,
                     init_state,
                     *attention_args,
                     decoder_class=BasicDecoder,
                     decoder_kwoptions={}):
        '''
    <Args>
    - dec_input_tokens:
    - dec_length:
    - init_state:
    - decoder_class:
    - decoder_options:
    '''
        with tf.variable_scope(self.scope or "Decoder") as scope:
            train_cell, init_state = self.setup_decoder_cell(
                self.config, self.keep_prob, False, init_state,
                *attention_args)

            self.input_project = tf.layers.Dense(units=self.config.hidden_size,
                                                 name="input_projection",
                                                 activation=self.activation)

            if hasattr(self.config, 'use_emb_as_out_proj') and \
               self.config.use_emb_as_out_proj == True:
                # Make the dim of decoder's output be hidden_size to emb_size.
                emb_project = tf.layers.Dense(units=self.config.hidden_size,
                                              use_bias=False,
                                              activation=None,
                                              name='emb_projection')
                output_kernel = emb_project(self.embeddings)
                output_kernel = tf.transpose(output_kernel)

                self.output_project = SharedKernelDense(
                    units=shape(self.embeddings, 0),
                    shared_kernel=output_kernel,
                    use_bias=False,
                    activation=None,
                    name='output_projection')
            else:
                self.output_project = tf.layers.Dense(units=shape(
                    self.embeddings, 0),
                                                      name='output_projection',
                                                      use_bias=False,
                                                      activation=None)
            #use_bias=False, trainable=False)
            # self.output_project = tf.layers.Dense(units=shape(self.embeddings, 0),
            #                                       name='output_projection')

            with tf.name_scope('Train'):
                inputs = tf.nn.embedding_lookup(self.embeddings,
                                                dec_input_tokens)
                inputs = self.input_project(inputs)
                inputs = tf.nn.dropout(inputs, self.keep_prob)

                helper = TrainingHelper(inputs,
                                        sequence_length=dec_lengths,
                                        time_major=False)
                train_decoder = decoder_class(train_cell,
                                              helper,
                                              init_state,
                                              output_layer=self.output_project,
                                              **decoder_kwoptions)

                max_dec_len = tf.reduce_max(dec_lengths, name="max_dec_len")
                outputs, final_state, _ = dynamic_decode(
                    train_decoder,
                    impute_finished=True,
                    maximum_iterations=max_dec_len,
                    scope=scope)
                logits = outputs.rnn_output

                # To prevent the training loss to be NaN.
                logits += 1e-9
                logits = tf.clip_by_value(logits,
                                          -20.0,
                                          20.0,
                                          name='clip_logits')
                self.train_decoder = train_decoder

        return logits, final_state
Esempio n. 11
0
 def _get_lstm_state_size(state):
     return [shape(state.h, i) for i in range(len(state.h.get_shape()))]
Esempio n. 12
0
    def encode(self, inputs, sequence_length):  # , merge_func=tf.reduce_mean):
        config = self.config
        with tf.variable_scope(self.scope or "RNNEncoder") as scope:
            if isinstance(inputs, list):
                inputs = [x for x in inputs if x is not None]
                sent_repls = tf.concat(
                    inputs, axis=-1)  # [*, max_sequence_len, hidden_size]
            else:
                sent_repls = inputs

            # Flatten the input tensor to a rank 3 tensor ([*, max_sequence_len, hidden_size]), to handle inputs with more than 3 rank. (e.g. context as list of utterances)
            input_hidden_size = shape(sent_repls, -1)
            max_sequence_len = shape(sent_repls, -2)
            other_shapes = [
                shape(sent_repls, i)
                for i in range(len(sent_repls.get_shape()[:-2]))
            ]
            flattened_batch_size = reduce(lambda x, y: x * y, other_shapes)

            flattened_shape = [
                flattened_batch_size, max_sequence_len, input_hidden_size
            ]
            flattened_sent_repls = tf.reshape(sent_repls, flattened_shape)

            flattened_sequence_length = tf.reshape(sequence_length,
                                                   [flattened_batch_size])

            inputs = flattened_sent_repls

            # Project input before the main RNN, to keep the dims of inputs equal to hidden_size in both cases of using birnn or not.
            input_project = tf.layers.Dense(units=self.config.hidden_size,
                                            dtype=tf.float32,
                                            name="input_projection",
                                            activation=self.activation)
            inputs = input_project(inputs)
            inputs = tf.nn.dropout(inputs, self.keep_prob)

            birnn_state = []
            if self.config.num_layers.birnn > 0:
                for i in range(self.config.num_layers.birnn):
                    with tf.variable_scope('BiRNN/L%d' % i):
                        use_residual = self.config.use_residual if i > 0 else False
                        outputs, state = setup_birnn(
                            inputs, flattened_sequence_length,
                            config.cell_type, config.hidden_size,
                            config.use_residual, self.keep_prob)
                        # Concat and project the outputs and the state from BiRNN to hidden_size.
                        state = merge_state(state, tf.concat)
                        state = project_state(state, self.config.hidden_size)
                        birnn_state.append(state)

                        outputs = tf.concat(outputs, axis=-1)
                        output_project = tf.layers.Dense(
                            units=self.config.hidden_size,
                            dtype=tf.float32,
                            name="output_projection",
                            activation=self.activation)
                        outputs = output_project(outputs)
                        outputs = tf.nn.dropout(outputs, self.keep_prob)
                        inputs = outputs

            rnn_state = []
            if self.config.num_layers.rnn > 0:
                with tf.variable_scope('RNN'):
                    #cells = self.setup_encoder_cell(self.config, self.keep_prob)
                    # outputs, state = rnn.dynamic_rnn(
                    #   cells, inputs,
                    #   sequence_length=flattened_sequence_length, dtype=tf.float32)
                    outputs, rnn_state = setup_rnn(
                        inputs, flattened_sequence_length, config.cell_type,
                        config.hidden_size, config.use_residual,
                        self.keep_prob, config.num_layers.rnn)

            # Turn the shape of outputs and state back.
            outputs = tf.reshape(
                outputs, other_shapes +
                [max_sequence_len, shape(outputs, -1)])

            state = list(birnn_state) + list(rnn_state)
            state = tuple([reshape_state(s, other_shapes) for s in state])
        return outputs, state