예제 #1
0
    def call(self, inputs, state):  # pylint: disable=arguments-differ
        """ Perform a step of attention-wrapped RNN
            :param inputs: (Possibly nested tuple of) Tensor, the input at this time step.
            :param state: An instance of `SelfAttentionWrapperState` containing tensors from the previous time step.
            :return: A tuple `(attention_or_cell_output, next_state)`, where:
                    - `attention_or_cell_output` depending on `output_attention`.
                    - `next_state` is an instance of `SelfAttentionWrapperState` containing the state calculated at
                       this time step.
        """
        if not isinstance(state, SelfAttentionWrapperState):
            raise TypeError(
                'Expected state to be instance of AttentionWrapperState. Received type %s instead.'
                % type(state))

        # Getting batch size
        batch_size = array_ops.shape(inputs)[0]
        assert len(inputs.shape) == 2, 'Expected inputs to be of rank 2'

        def get_next_memory_and_attn():
            """ Gets the next memory and attention """
            next_memory = array_ops.concat(
                [
                    state.memory,  # [b, t, mem_size]
                    array_ops.expand_dims(self._input_fn(inputs), axis=1)
                ],
                axis=1)
            next_attention = self._compute_attention(inputs, next_memory)
            with ops.control_dependencies([next_memory, next_attention]):
                return array_ops.identity(next_memory), array_ops.identity(
                    next_attention)

        def get_zero_memory_and_attn():
            """ Time = 0, we don't concatenate to memory and attention is all 0. """
            next_memory = state.memory
            next_attention = array_ops.zeros(
                [batch_size, self._attention_layer_size], dtype=inputs.dtype)
            with ops.control_dependencies([next_memory, next_attention]):
                return array_ops.identity(next_memory), array_ops.identity(
                    next_attention)

        # Computing memory and attention
        memory, attention = control_flow_ops.cond(
            gen_math_ops.equal(state.time, 0),
            true_fn=get_zero_memory_and_attn,
            false_fn=get_next_memory_and_attn)

        # Calculate the true inputs to the cell based on the previous attention value.
        cell_inputs = self._cell_input_fn(inputs, attention)
        cell_state = state.cell_state
        cell_output, cell_state = self._cell(cell_inputs, cell_state)

        # Extracting computed context
        next_state = SelfAttentionWrapperState(cell_state=cell_state,
                                               time=state.time + 1,
                                               memory=memory)

        # Returning cell output or attention
        if self._output_attention:
            return attention, next_state
        return cell_output, next_state
예제 #2
0
    def next_inputs(self, time, outputs, state, sample_ids, name=None):
        """ Computes the next inputs at a time step """
        with ops.name_scope(name, 'CustomHelperNextInputs',
                            [time, outputs, state, sample_ids]):
            next_time = time + 1
            finished = (next_time >= self._sequence_length)
            all_finished = math_ops.reduce_all(finished)

            def get_next_inputs():
                """ Retrieves the inputs for the next time step """
                def get_training_inputs():
                    """ Selecting training inputs """
                    read_op = self._input_tas.read(next_time)
                    with ops.control_dependencies([read_op]):
                        return array_ops.identity(read_op)

                def get_sample_inputs():
                    """ Selecting greedy/sample inputs """
                    return sample_ids

                inputs_next_step = control_flow_ops.case(
                    [(gen_math_ops.equal(self._decoder_type, TRAINING_DECODER),
                      get_training_inputs),
                     (gen_math_ops.equal(self._decoder_type,
                                         GREEDY_DECODER), get_sample_inputs),
                     (gen_math_ops.equal(self._decoder_type,
                                         SAMPLE_DECODER), get_sample_inputs)],
                    default=get_training_inputs)
                inputs_emb_next_step = self._input_layer(
                    self._embedding_fn(inputs_next_step))

                # Applying mask
                # inputs_one_hot:   (b, 1, VOC, 1)
                # mask_t:           (b, 1, VOC, VOC)
                # next_mask:        (b, VOC)        -- DenseTensor
                inputs_one_hot = array_ops.one_hot(inputs_next_step,
                                                   self.vocab_size)[:, None, :,
                                                                    None]
                mask_t = _slice_mask(self._mask, [-1, next_time, -1, -1],
                                     time_major=self._time_major)
                next_mask = sparse_ops.sparse_reduce_sum(inputs_one_hot *
                                                         mask_t,
                                                         axis=[1, 2])
                next_mask = gen_math_ops.minimum(next_mask, 1.)
                next_mask.set_shape([None, self.vocab_size])

                # Prevents this branch from executing eagerly
                with ops.control_dependencies(
                    [inputs_emb_next_step, next_mask]):
                    return MaskedInputs(
                        inputs=array_ops.identity(inputs_emb_next_step),
                        mask=array_ops.identity(next_mask))

            next_inputs = control_flow_ops.cond(
                all_finished,
                true_fn=lambda: self._zero_inputs,
                false_fn=get_next_inputs)

            # Returning
            return (finished, next_inputs, state)
예제 #3
0
    def initialize(self, name=None):
        """ Performs helper initialization (to get initial state) """
        with ops.name_scope(name, 'CustomHelperInitialize'):
            finished = gen_math_ops.equal(0, self._sequence_length)
            all_finished = math_ops.reduce_all(finished)
            initial_candidates = self._candidate_tas.read(0)

            def training_inputs():
                """ Returns the training initial input """
                embed_op = self._order_embedding_fn(self._input_tas.read(0))
                with ops.control_dependencies([embed_op]):
                    return array_ops.identity(embed_op)

            def start_inputs():
                """ Returns the GO_ID initial input """
                embed_op = self._order_embedding_fn(self._start_inputs)
                with ops.control_dependencies([embed_op]):
                    return array_ops.identity(embed_op)

            # Getting initial inputs
            initial_inputs = control_flow_ops.case(
                [(gen_math_ops.equal(self._decoder_type, TRAINING_DECODER), training_inputs),
                 (gen_math_ops.equal(self._decoder_type, GREEDY_DECODER), start_inputs),
                 (gen_math_ops.equal(self._decoder_type, SAMPLE_DECODER), start_inputs)],
                default=training_inputs)

            next_inputs = \
                control_flow_ops.cond(all_finished,
                                      lambda: self._zero_inputs,
                                      lambda: CandidateInputs(
                                          inputs=self._input_layer(initial_inputs),
                                          candidates=initial_candidates,
                                          candidates_emb=self._candidate_embedding_fn(initial_candidates)))
            return (finished, next_inputs)
예제 #4
0
    def _take_sparse_grad(grad_accum, grad):
        """ Computes the gradient for a SparseConditionalAccumulator
            :param grad_accum: The gradient accumulator where gradients are stored
            :param grad: An instance of the gradient stored in the accumulator
            :return: The avg gradient to apply (or a zero-like object if no gradients are stored)
            :type grad_accum: data_flow_ops.SparseConditionalAccumulator
        """
        def _take_grad():
            """ Computes the gradient from the accumulator """
            avg_grad = grad_accum.take_indexed_slices_grad(num_required=1)
            with ops.control_dependencies([avg_grad]):
                return ops.IndexedSlices(values=array_ops.identity(
                    avg_grad.values),
                                         indices=avg_grad.indices,
                                         dense_shape=avg_grad.dense_shape)

        def _zero_grad():
            """ Returns a zeroed-out gradient """
            zero_values = array_ops.zeros_like(grad.values)
            with ops.control_dependencies([zero_values]):
                return ops.IndexedSlices(
                    values=array_ops.identity(zero_values),
                    indices=math_ops.cast(grad.indices, dtypes.int64),
                    dense_shape=math_ops.cast(grad.dense_shape, dtypes.int64))

        return control_flow_ops.cond(gen_math_ops.equal(
            grad_accum.num_accumulated(), 0),
                                     true_fn=_zero_grad,
                                     false_fn=_take_grad)
예제 #5
0
    def next_inputs(self, time, inputs, beam_search_output, beam_search_state):
        """ Computes the inputs at the next time step given the beam outputs
            :param time: The current time step (scalar)
            :param inputs: A (structure of) input tensors.
            :param beam_search_output: The output of the beam search step
            :param beam_search_state: The state after the beam search step
            :return: `(beam_search_output, next_inputs)`
            :type beam_search_output: beam_search_decoder.BeamSearchDecoderOutput
            :type beam_search_state: beam_search_decoder.BeamSearchDecoderState
        """
        next_time = time + 1
        all_finished = math_ops.reduce_all(next_time >= self._sequence_length)
        sample_ids = beam_search_output.predicted_ids

        def get_next_inputs():
            """ Retrieves the inputs for the next time step """
            inputs_next_step = sample_ids
            inputs_emb_next_step = self._input_layer(
                self._embedding_fn(inputs_next_step))  # [bat, beam, in_sz]

            # Applying mask
            # inputs_one_hot:   (batch, beam,   1, VOC,   1)
            # mask_t:           (batch,    1,   1, VOC, VOC)
            # next_mask:        (batch, beam, VOC)
            inputs_one_hot = array_ops.one_hot(inputs_next_step,
                                               self.vocab_size)[:, :, None, :,
                                                                None]
            mask_t = sparse_ops.sparse_tensor_to_dense(
                _slice_mask(self._mask, [-1, next_time, -1, -1],
                            time_major=self._time_major))[:, None, :, :, :]
            mask_t.set_shape([None, 1, 1, self.vocab_size, self.vocab_size])
            next_mask = math_ops.reduce_sum(inputs_one_hot * mask_t,
                                            axis=[2, 3])
            next_mask = gen_math_ops.minimum(next_mask, 1.)

            # Prevents this branch from executing eagerly
            with ops.control_dependencies([inputs_emb_next_step, next_mask]):
                return MaskedInputs(
                    inputs=array_ops.identity(inputs_emb_next_step),
                    mask=array_ops.identity(next_mask))

        # Getting next inputs
        next_inputs = control_flow_ops.cond(all_finished,
                                            true_fn=lambda: self._zero_inputs,
                                            false_fn=get_next_inputs)

        # Returning
        return beam_search_output, next_inputs
예제 #6
0
    def call(self, inputs, state):  # pylint: disable=arguments-differ
        """ Perform a step of attention-wrapped RNN
            :param inputs: (Possibly nested tuple of) Tensor, the input at this time step.
            :param state: An instance of `AttentionWrapperState` containing tensors from the previous time step.
            :return: A tuple `(attention_or_cell_output, next_state)`, where:
                    - `attention_or_cell_output` depending on `output_attention`.
                    - `next_state` is an instance of `AttentionWrapperState` containing the state calculated at
                       this time step.
        """
        if not isinstance(state, AttentionWrapperState):
            raise TypeError(
                'Expected state to be instance of AttentionWrapperState. Received type %s instead.'
                % type(state))

        next_time = state.time + 1
        finished = (next_time >= self._sequence_length)
        all_finished = math_ops.reduce_all(finished)

        def get_next_alignments():
            """ Returns the next alignments """
            next_align = self._alignments_ta.read(next_time)
            with ops.control_dependencies([next_align]):
                return array_ops.identity(next_align)

        # Calculate the true inputs to the cell based on the previous attention value.
        cell_inputs = self._cell_input_fn(inputs, state.attention)
        cell_state = state.cell_state
        cell_output, cell_state = self._cell(cell_inputs, cell_state)

        # Computing context
        next_alignments = control_flow_ops.cond(
            all_finished,
            true_fn=lambda: self._zero_alignment,
            false_fn=get_next_alignments)
        attention, _ = self._compute_attention(next_alignments, self._memory)

        next_state = AttentionWrapperState(time=next_time,
                                           cell_state=cell_state,
                                           attention=attention,
                                           alignments=next_alignments,
                                           attention_state=next_alignments,
                                           alignment_history=())

        if self._output_attention:
            return attention, next_state
        return cell_output, next_state
예제 #7
0
    def next_inputs(self, time, inputs, beam_search_output, beam_search_state):
        """ Computes the inputs at the next time step given the beam outputs
            :param time: The current time step (scalar)
            :param inputs: A (structure of) input tensors.
            :param beam_search_output: The output of the beam search step
            :param beam_search_state: The state after the beam search step
            :return: `(beam_search_output, next_inputs)`
            :type beam_search_output: beam_search_decoder.BeamSearchDecoderOutput
            :type beam_search_state: beam_search_decoder.BeamSearchDecoderState
        """
        next_time = time + 1
        all_finished = math_ops.reduce_all(next_time >= self._sequence_length)

        # Sampling
        next_word_ids = beam_search_output.predicted_ids
        candidates = inputs.candidates
        nb_candidates = array_ops.shape(candidates)[1]
        sample_ids = math_ops.reduce_sum(array_ops.one_hot(next_word_ids, nb_candidates, dtype=dtypes.int32)
                                         * array_ops.expand_dims(candidates, axis=1), axis=-1)

        def get_next_inputs():
            """ Retrieves the inputs for the next time step """
            inputs_next_step = sample_ids
            inputs_emb_next_step = self._input_layer(self._order_embedding_fn(inputs_next_step))
            candidate_next_step = self._candidate_tas.read(next_time)
            candidate_emb_next_step = self._candidate_embedding_fn(candidate_next_step)

            # Prevents this branch from executing eagerly
            with ops.control_dependencies([inputs_emb_next_step, candidate_next_step, candidate_emb_next_step]):
                return CandidateInputs(inputs=array_ops.identity(inputs_emb_next_step),
                                       candidates=array_ops.identity(candidate_next_step),
                                       candidates_emb=array_ops.identity(candidate_emb_next_step))

        # Getting next inputs
        next_inputs = control_flow_ops.cond(all_finished,
                                            true_fn=lambda: self._zero_inputs,
                                            false_fn=get_next_inputs)

        # Rewriting beam search output with the correct sample ids
        beam_search_output = beam_search_decoder.BeamSearchDecoderOutput(scores=beam_search_output.scores,
                                                                         predicted_ids=sample_ids,
                                                                         parent_ids=beam_search_output.parent_ids)

        # Returning
        return beam_search_output, next_inputs
예제 #8
0
    def initialize(self):
        """ Initialize the beam helper - Called in beam_decoder.initialize()
            :return: `(finished, start_inputs, initial_cell_state)`.
        """
        finished, zero_inputs, zero_mask = self._finished, self._zero_inputs, self._zero_mask
        all_finished = math_ops.reduce_all(
            gen_math_ops.equal(0, self._sequence_length))
        initial_inputs = self._embedding_fn(self._start_tokens)

        # Start Inputs
        start_inputs = control_flow_ops.cond(
            all_finished, lambda: zero_inputs,
            lambda: MaskedInputs(inputs=self._split_batch_beams(
                self._input_layer(initial_inputs), self._input_size),
                                 mask=zero_mask))

        # Returning
        return finished, start_inputs, self._initial_cell_state
예제 #9
0
    def initialize(self):
        """ Initialize the beam helper - Called in beam_decoder.initialize()
            :return: `(finished, start_inputs, initial_cell_state)`.
        """
        finished, zero_inputs = self._finished, self._zero_inputs
        all_finished = math_ops.reduce_all(gen_math_ops.equal(0, self._sequence_length))
        initial_inputs = self._order_embedding_fn(self._start_tokens)
        initial_candidates = self._candidate_tas.read(0)

        # Start Inputs
        start_inputs = control_flow_ops.cond(all_finished,
                                             lambda: zero_inputs,
                                             lambda: CandidateInputs(
                                                 inputs=self._split_batch_beams(self._input_layer(initial_inputs),
                                                                                self._input_size),
                                                 candidates=initial_candidates,
                                                 candidates_emb=self._candidate_embedding_fn(initial_candidates)))

        return finished, start_inputs, self._initial_cell_state
예제 #10
0
    def next_inputs(self, time, outputs, state, sample_ids, name=None):
        """ Computes the next inputs at a time step """
        with ops.name_scope(name, 'CustomHelperNextInputs', [time, outputs, state, sample_ids]):
            next_time = time + 1
            finished = (next_time >= self._sequence_length)
            all_finished = math_ops.reduce_all(finished)

            def get_next_inputs():
                """ Retrieves the inputs for the next time step """
                def get_training_inputs():
                    """ Selecting training inputs """
                    read_op = self._input_tas.read(next_time)
                    with ops.control_dependencies([read_op]):
                        return array_ops.identity(read_op)

                def get_sample_inputs():
                    """ Selecting greedy/sample inputs """
                    return sample_ids

                inputs_next_step = control_flow_ops.case(
                    [(gen_math_ops.equal(self._decoder_type, TRAINING_DECODER), get_training_inputs),
                     (gen_math_ops.equal(self._decoder_type, GREEDY_DECODER), get_sample_inputs),
                     (gen_math_ops.equal(self._decoder_type, SAMPLE_DECODER), get_sample_inputs)],
                    default=get_training_inputs)
                inputs_emb_next_step = self._input_layer(self._order_embedding_fn(inputs_next_step))
                candidate_next_step = self._candidate_tas.read(next_time)
                candidate_emb_next_step = self._candidate_embedding_fn(candidate_next_step)

                # Prevents this branch from executing eagerly
                with ops.control_dependencies([inputs_emb_next_step, candidate_next_step, candidate_emb_next_step]):
                    return CandidateInputs(inputs=array_ops.identity(inputs_emb_next_step),
                                           candidates=array_ops.identity(candidate_next_step),
                                           candidates_emb=array_ops.identity(candidate_emb_next_step))

            next_inputs = control_flow_ops.cond(all_finished,
                                                true_fn=lambda: self._zero_inputs,
                                                false_fn=get_next_inputs)

            # Returning
            return (finished, next_inputs, state)
예제 #11
0
    def _take_dense_grad(grad_accum, grad):
        """ Computes the gradient for a ConditionalAccumulator
            :param grad_accum: The gradient accumulator where gradients are stored
            :param grad: An instance of the gradient stored in the accumulator
            :return: The avg gradient to apply (or a zero-like object if no gradients are stored)
            :type grad_accum: data_flow_ops.ConditionalAccumulator
        """
        def _take_grad():
            """ Computes the gradient from the accumulator """
            avg_grad = grad_accum.take_grad(num_required=1)
            with ops.control_dependencies([avg_grad]):
                return array_ops.identity(avg_grad)

        def _zero_grad():
            """ Returns a zeroed-out gradient """
            zero_like_grad = array_ops.zeros_like(grad)
            with ops.control_dependencies([zero_like_grad]):
                return array_ops.identity(zero_like_grad)

        return control_flow_ops.cond(gen_math_ops.equal(
            grad_accum.num_accumulated(), 0),
                                     true_fn=_zero_grad,
                                     false_fn=_take_grad)
예제 #12
0
def seeded_dropout(inputs,
                   seeds,
                   keep_probs,
                   offset=None,
                   noise_shape=None,
                   seed=None,
                   name=None):
    """ Computes dropout (with a deterministic mask).
        Every item in the batch has a deterministic seed to compute the deterministic mask

        With probability `keep_probs`, outputs the input element scaled up by `1 / keep_prob`, otherwise outputs `0`.
        The scaling is so that the expected sum is unchanged.

        By default, each element is kept or dropped independently. If `noise_shape` is specified, it must be
        broadcastable to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` will make
        independent decisions.

        For example, if `shape(x) = [k, l, m, n]` and `noise_shape = [k, 1, 1, n]`, each batch and channel component
        will be kept independently and each row and column will be kept or not kept together.

        :param inputs: A floating point tensor.
        :param seeds: A tensor representing the seed for each item in the batch. (Size: (batch,))
        :param keep_probs: A scalar or vector of size (batch,). The probability that each element is kept.
        :param offset: Integer. Alternative offset to apply to compute the deterministic mask (e.g. in a loop).
        :param noise_shape: A 1-D `Tensor` of type `int32`, represents the shape for randomly generated keep/drop flags.
        :param seed: A Python integer. Used to create a default seed for the operation.
        :param name: name: A name for this operation (optional).
        :return: A Tensor of the same shape of `x`.
    """
    if offset is None:
        seeded_dropout.offset += 40555607

    # If inputs is a scalar, this is likely the 'time' attribute in a state, we don't want to mask it
    # Same thing for integers - We can safely ignore them
    # So we don't want to mask it
    if not inputs.shape or inputs.dtype.is_integer:
        return inputs

    with ops.name_scope(name, 'seeded_dropout', [inputs]):
        inputs = ops.convert_to_tensor(inputs, name='x')
        if not inputs.dtype.is_floating:
            raise ValueError(
                'Expected a floating point tensor. Got a %s tensor instead.' %
                inputs.dtype)
        if isinstance(keep_probs, float) and not 0 < keep_probs <= 1:
            raise ValueError(
                'keep_probs must be a scalar tensor or a float in the range (0, 1], got %g'
                % keep_probs)

        # Early return if nothing needs to be dropped.
        if isinstance(keep_probs, float) and keep_probs == 1:
            return inputs

        # Not supported in eager mode
        if context.executing_eagerly():
            raise ValueError('This function is not supported in eager mode.')

        # Converting to tensor
        keep_probs = ops.convert_to_tensor(keep_probs,
                                           dtype=inputs.dtype,
                                           name='keep_probs')
        keep_probs = gen_math_ops.maximum(0.,
                                          gen_math_ops.minimum(1., keep_probs))
        keep_probs = gen_array_ops.reshape(keep_probs, [-1] + [1] *
                                           (len(inputs.shape) - 1))
        all_keep_probs_are_one = math_ops.reduce_all(
            gen_math_ops.equal(keep_probs, 1.))

        # Computing noise shape
        noise_shape = nn_ops._get_noise_shape(inputs, noise_shape)  # pylint: disable=protected-access

        def get_dropout_mask():
            """ Computes the dropout mask """
            # random_tensor = uniform [keep_probs, 1.0 + keep_probs)
            random_tensor = keep_probs
            random_tensor += seeded_random(
                seeds,
                offset=offset if offset is not None else seeded_dropout.offset,
                shape=noise_shape[1:],
                dtype=inputs.dtype,
                seed=seed)

            # 0. if [keep_probs, 1.0) and 1. if [1.0, 1.0 + keep_prob)
            binary_tensor = gen_math_ops.floor(random_tensor)
            ret = math_ops.divide(inputs, keep_probs) * binary_tensor
            ret.set_shape(inputs.get_shape())

            # Setting control flow ops to avoid computing this function if not required
            with ops.control_dependencies([ret]):
                return array_ops.identity(ret)

        # Returning the dropout mask
        return control_flow_ops.cond(all_keep_probs_are_one,
                                     true_fn=lambda: inputs,
                                     false_fn=get_dropout_mask)