def __init__(self,
                 num_mixtures,
                 memory,
                 memory_sequence_length=None,
                 check_inner_dims_defined=True,
                 score_mask_value=None,
                 name='GmmAttention'):

        self.dtype = memory.dtype
        self.num_mixtures = num_mixtures
        self.query_layer = tf.layers.Dense(3 * num_mixtures,
                                           name='gmm_query_layer',
                                           use_bias=True,
                                           dtype=self.dtype)

        with tf.name_scope(name, 'GmmAttentionMechanismInit'):
            if score_mask_value is None:
                score_mask_value = 0.
            self._maybe_mask_score = functools.partial(
                _maybe_mask_score,
                memory_sequence_length=memory_sequence_length,
                score_mask_value=score_mask_value)
            self._value = _prepare_memory(memory, memory_sequence_length,
                                          check_inner_dims_defined)
            self._batch_size = (self._value.shape[0].value
                                or tf.shape(self._value)[0])
            self._alignments_size = (self._value.shape[1].value
                                     or tf.shape(self._value)[1])
    def __init__(self,
                 cell,
                 memory,
                 memory_sequence_length,
                 output_layer,
                 max_oovs,
                 batch_size,
                 memory_full_vocab,
                 first_lv_sim_func,
                 second_lv_sim_func,
                 attention_layer_size=None,
                 alignment_history=False,
                 cell_input_fn=None,
                 output_attention=False,
                 output_generation_distribution=False,
                 output_copy_distribution=False,
                 output_combined_distribution=True,
                 initial_cell_state=None,
                 unk_id=None,
                 name=None):

        super(HierarchicalAttnPointerWrapper, self).__init__(name=name)
        if not rnn_cell_impl._like_rnncell(cell):  # pylint: disable=protected-access
            raise TypeError("cell must be an RNNCell, saw type: %s" %
                            type(cell).__name__)

        self._is_multi = False

        if cell_input_fn is None:
            cell_input_fn = (lambda inputs, attention: array_ops.concat(
                [inputs, attention], -1))
        else:
            if not callable(cell_input_fn):
                raise TypeError(
                    "cell_input_fn must be callable, saw type: %s" %
                    type(cell_input_fn).__name__)

        if attention_layer_size is not None:
            attention_layer_sizes = tuple(attention_layer_size if isinstance(
                attention_layer_size, (list,
                                       tuple)) else (attention_layer_size, ))
            if len(attention_layer_sizes) != 1:
                raise ValueError(
                    "If provided, attention_layer_size must contain exactly one "
                    "integer per attention_mechanism, saw: %d vs 1" %
                    (len(attention_layer_sizes)))

            self._attention_layers = tuple(
                layers_core.Dense(attention_layer_size,
                                  name="attention_layer",
                                  use_bias=False)
                for attention_layer_size in attention_layer_sizes)
            self._attention_layer_size = sum(attention_layer_sizes)
        else:
            self._attention_layers = None
            self._attention_layer_size = memory.get_shape()[-1].value

        self._cell = cell
        self._cell_input_fn = cell_input_fn
        self._output_attention = output_attention
        self._output_generation_distribution = output_generation_distribution
        self._output_copy_distribution = output_copy_distribution
        self._output_combined_distribution = output_combined_distribution
        self._unk_id = unk_id
        self._alignment_history = alignment_history
        self._output_layer = output_layer
        self._max_oovs = max_oovs
        self._batch_size = batch_size

        [self._b, self._k, _, h] = memory.get_shape().as_list()
        #self._k = tf.shape(memory)[1].value
        #self._b = tf.shape(memory)[0].value
        #h = tf.shape(memory)[-1].value

        b = self._b
        k = self._k

        mem_reshaped = tf.reshape(memory, [b * k, -1, h])
        print(mem_reshaped.get_shape().as_list())
        mem_mask_reshaped = tf.reshape(memory_sequence_length, [-1])

        self._memory = tf.reshape(
            _prepare_memory(mem_reshaped, mem_mask_reshaped, False),
            [b, k, -1, h])
        self._memory_full_vocab = memory_full_vocab

        self._attention_mechanisms = [None]  # placeholder

        with tf.variable_scope("first_lv_attn"):
            self._first_lv_sim_func = first_lv_sim_func

        with tf.variable_scope("second_lv_attn"):
            self._second_lv_sim_func = second_lv_sim_func

        if self._output_combined_distribution or \
                self._output_generation_distribution or \
                self._output_copy_distribution or \
                self._output_attention:
            assert self._output_combined_distribution ^\
                self._output_generation_distribution ^\
                self._output_copy_distribution ^\
                self._output_attention, "Can only output one type!"

        if self._output_combined_distribution or self._output_copy_distribution:
            assert self._unk_id is not None

        with ops.name_scope(name, "AttnPointerWrapperInit"):
            if initial_cell_state is None:
                self._initial_cell_state = None
            else:
                final_state_tensor = nest.flatten(initial_cell_state)[-1]
                state_batch_size = (final_state_tensor.shape[0].value
                                    or array_ops.shape(final_state_tensor)[0])
                error_message = (
                    "When constructing AttnPointerWrapper %s: " %
                    self._base_name +
                    "Non-matching batch sizes between the memory "
                    "(encoder output) and initial_cell_state.  Are you using "
                    "the BeamSearchDecoder?  You may need to tile your initial state "
                    "via the tf.contrib.seq2seq.tile_batch function with argument "
                    "multiple=beam_width.")
                with ops.control_dependencies(
                        self._batch_size_checks(state_batch_size,
                                                error_message)):
                    self._initial_cell_state = nest.map_structure(
                        lambda s: array_ops.identity(
                            s, name="check_initial_cell_state"),
                        initial_cell_state)
Exemple #3
0
    def __init__(self,
                 attention_v,
                 dec_query_layer,
                 enc_query_layer,
                 memory_layer,
                 memory,
                 memory_sequence_length=None,
                 probability_fn=None,
                 score_mask_value=None,
                 check_inner_dims_defined=True):
        """Construct the word level attention mechanism.
        Args:
            attention_v: The attention v variable.
            dec_query_layer: Mapping layer for decoder's query.
            enc_query_layer: Mapping layer for utterance-level encoder's query.
            memory_layer: Mapping layer for memory.
            memory: The memory to query; the output of a bidirectional RNN.  This
                tensor should be shaped `[batch_size, max_uttr_len, 2*n_hidden_units]`.
            memory_sequence_length (optional): Sequence lengths for the batch entries
                in memory.  If provided, the memory tensor rows are masked with zeros
                for values past the respective sequence lengths.
            probability_fn: (optional) A `callable`.  Converts the score to
                probabilities.  The default is `tf.nn.softmax`. Other options include
                `tf.contrib.seq2seq.hardmax` and `tf.contrib.sparsemax.sparsemax`.
                Its signature should be: `probabilities = probability_fn(score)`.
            score_mask_value: (optional): The mask value for score before passing into
                `probability_fn`. The default is -inf. Only used if
                `memory_sequence_length` is not None.
            check_inner_dims_defined: Python boolean.  If `True`, the `memory`
                argument's shape is checked to ensure all but the two outermost
                dimensions are fully defined.
        """
        # super(WordLevelAttentionMechanism, self).__init__(
        #     query_layer = None,
        #     memory_layer = memory_layer,
        #     memory = memory,
        #     probability_fn = wrapped_probability_fn,
        #     memory_sequence_length = memory_sequence_length)

        # Use custom initialization due to the probable zero values in memory_sequence_length
        if probability_fn is None:
            probability_fn = tf.nn.softmax
        if (memory_layer is not None
                and not isinstance(memory_layer, tf.layers.Layer)):
            raise TypeError("memory_layer is not a Layer: %s" %
                            type(memory_layer).__name__)
        self._query_layer = None
        self._memory_layer = memory_layer
        self.dtype = memory_layer.dtype
        if not callable(probability_fn):
            raise TypeError("probability_fn must be callable, saw type: %s" %
                            type(probability_fn).__name__)
        if score_mask_value is None:
            score_mask_value = tf.dtypes.as_dtype(
                self._memory_layer.dtype).as_numpy_dtype(-np.inf)
        self._probability_fn = lambda score: _maybe_mask_score_softmax(
            probability_fn(
                _maybe_mask_score_no_check(score, memory_sequence_length,
                                           score_mask_value)),
            memory_sequence_length)
        with tf.name_scope(None, "BaseAttentionMechanismInit",
                           tf.contrib.framework.nest.flatten(memory)):
            self._values = _prepare_memory(
                memory,
                memory_sequence_length,
                check_inner_dims_defined=check_inner_dims_defined)
            self._keys = (self.memory_layer(self._values)
                          if self.memory_layer else self._values)
            self._batch_size = (self._keys.shape[0].value
                                or tf.shape(self._keys)[0])
            self._alignments_size = (self._keys.shape[1].value
                                     or tf.shape(self._keys)[1])

        # Extra initialization
        self._dec_query_layer = dec_query_layer
        self._enc_query_layer = enc_query_layer
        self._attention_v = attention_v