Exemple #1
0
    def sample(self, time, outputs, state, name=None):
        """ Samples the id for the next time step (or -1 for teacher forcing) """
        with ops.name_scope(name, 'CustomHelperSample',
                            [time, outputs, state]):

            def training():
                """ Selecting training / teacher forcing """
                fill_op = gen_array_ops.fill([array_ops.shape(outputs)[0]], -1)
                with ops.control_dependencies([fill_op]):
                    return array_ops.identity(fill_op)

            def greedy():
                """ Selecting greedy """
                argmax_op = math_ops.argmax(outputs,
                                            axis=-1,
                                            output_type=dtypes.int32)
                with ops.control_dependencies([argmax_op]):
                    return array_ops.identity(argmax_op)

            def sample():
                """ Sampling """
                logits = outputs if self._softmax_temperature is None else outputs / self._softmax_temperature
                sample_id_sampler = categorical.Categorical(logits=logits)
                sample_op = sample_id_sampler.sample(seed=self._seed)
                with ops.control_dependencies([sample_op]):
                    return array_ops.identity(sample_op)

            return control_flow_ops.case(
                [(gen_math_ops.equal(self._decoder_type,
                                     TRAINING_DECODER), training),
                 (gen_math_ops.equal(self._decoder_type,
                                     GREEDY_DECODER), greedy),
                 (gen_math_ops.equal(self._decoder_type,
                                     SAMPLE_DECODER), sample)],
                default=training)
Exemple #2
0
            def get_next_inputs():
                """ Retrieves the inputs for the next time step """
                def get_training_inputs():
                    """ Selecting training inputs """
                    read_op = self._input_tas.read(next_time)
                    with ops.control_dependencies([read_op]):
                        return array_ops.identity(read_op)

                def get_sample_inputs():
                    """ Selecting greedy/sample inputs """
                    return sample_ids

                inputs_next_step = control_flow_ops.case(
                    [(gen_math_ops.equal(self._decoder_type, TRAINING_DECODER), get_training_inputs),
                     (gen_math_ops.equal(self._decoder_type, GREEDY_DECODER), get_sample_inputs),
                     (gen_math_ops.equal(self._decoder_type, SAMPLE_DECODER), get_sample_inputs)],
                    default=get_training_inputs)
                inputs_emb_next_step = self._input_layer(self._order_embedding_fn(inputs_next_step))
                candidate_next_step = self._candidate_tas.read(next_time)
                candidate_emb_next_step = self._candidate_embedding_fn(candidate_next_step)

                # Prevents this branch from executing eagerly
                with ops.control_dependencies([inputs_emb_next_step, candidate_next_step, candidate_emb_next_step]):
                    return CandidateInputs(inputs=array_ops.identity(inputs_emb_next_step),
                                           candidates=array_ops.identity(candidate_next_step),
                                           candidates_emb=array_ops.identity(candidate_emb_next_step))
Exemple #3
0
    def initialize(self, name=None):
        """ Performs helper initialization (to get initial state) """
        with ops.name_scope(name, 'CustomHelperInitialize'):
            finished = gen_math_ops.equal(0, self._sequence_length)
            all_finished = math_ops.reduce_all(finished)
            initial_candidates = self._candidate_tas.read(0)

            def training_inputs():
                """ Returns the training initial input """
                embed_op = self._order_embedding_fn(self._input_tas.read(0))
                with ops.control_dependencies([embed_op]):
                    return array_ops.identity(embed_op)

            def start_inputs():
                """ Returns the GO_ID initial input """
                embed_op = self._order_embedding_fn(self._start_inputs)
                with ops.control_dependencies([embed_op]):
                    return array_ops.identity(embed_op)

            # Getting initial inputs
            initial_inputs = control_flow_ops.case(
                [(gen_math_ops.equal(self._decoder_type, TRAINING_DECODER), training_inputs),
                 (gen_math_ops.equal(self._decoder_type, GREEDY_DECODER), start_inputs),
                 (gen_math_ops.equal(self._decoder_type, SAMPLE_DECODER), start_inputs)],
                default=training_inputs)

            next_inputs = \
                control_flow_ops.cond(all_finished,
                                      lambda: self._zero_inputs,
                                      lambda: CandidateInputs(
                                          inputs=self._input_layer(initial_inputs),
                                          candidates=initial_candidates,
                                          candidates_emb=self._candidate_embedding_fn(initial_candidates)))
            return (finished, next_inputs)
Exemple #4
0
    def call(self, inputs, state):  # pylint: disable=arguments-differ
        """ Perform a step of attention-wrapped RNN
            :param inputs: (Possibly nested tuple of) Tensor, the input at this time step.
            :param state: An instance of `SelfAttentionWrapperState` containing tensors from the previous time step.
            :return: A tuple `(attention_or_cell_output, next_state)`, where:
                    - `attention_or_cell_output` depending on `output_attention`.
                    - `next_state` is an instance of `SelfAttentionWrapperState` containing the state calculated at
                       this time step.
        """
        if not isinstance(state, SelfAttentionWrapperState):
            raise TypeError(
                'Expected state to be instance of AttentionWrapperState. Received type %s instead.'
                % type(state))

        # Getting batch size
        batch_size = array_ops.shape(inputs)[0]
        assert len(inputs.shape) == 2, 'Expected inputs to be of rank 2'

        def get_next_memory_and_attn():
            """ Gets the next memory and attention """
            next_memory = array_ops.concat(
                [
                    state.memory,  # [b, t, mem_size]
                    array_ops.expand_dims(self._input_fn(inputs), axis=1)
                ],
                axis=1)
            next_attention = self._compute_attention(inputs, next_memory)
            with ops.control_dependencies([next_memory, next_attention]):
                return array_ops.identity(next_memory), array_ops.identity(
                    next_attention)

        def get_zero_memory_and_attn():
            """ Time = 0, we don't concatenate to memory and attention is all 0. """
            next_memory = state.memory
            next_attention = array_ops.zeros(
                [batch_size, self._attention_layer_size], dtype=inputs.dtype)
            with ops.control_dependencies([next_memory, next_attention]):
                return array_ops.identity(next_memory), array_ops.identity(
                    next_attention)

        # Computing memory and attention
        memory, attention = control_flow_ops.cond(
            gen_math_ops.equal(state.time, 0),
            true_fn=get_zero_memory_and_attn,
            false_fn=get_next_memory_and_attn)

        # Calculate the true inputs to the cell based on the previous attention value.
        cell_inputs = self._cell_input_fn(inputs, attention)
        cell_state = state.cell_state
        cell_output, cell_state = self._cell(cell_inputs, cell_state)

        # Extracting computed context
        next_state = SelfAttentionWrapperState(cell_state=cell_state,
                                               time=state.time + 1,
                                               memory=memory)

        # Returning cell output or attention
        if self._output_attention:
            return attention, next_state
        return cell_output, next_state
    def _take_sparse_grad(grad_accum, grad):
        """ Computes the gradient for a SparseConditionalAccumulator
            :param grad_accum: The gradient accumulator where gradients are stored
            :param grad: An instance of the gradient stored in the accumulator
            :return: The avg gradient to apply (or a zero-like object if no gradients are stored)
            :type grad_accum: data_flow_ops.SparseConditionalAccumulator
        """
        def _take_grad():
            """ Computes the gradient from the accumulator """
            avg_grad = grad_accum.take_indexed_slices_grad(num_required=1)
            with ops.control_dependencies([avg_grad]):
                return ops.IndexedSlices(values=array_ops.identity(
                    avg_grad.values),
                                         indices=avg_grad.indices,
                                         dense_shape=avg_grad.dense_shape)

        def _zero_grad():
            """ Returns a zeroed-out gradient """
            zero_values = array_ops.zeros_like(grad.values)
            with ops.control_dependencies([zero_values]):
                return ops.IndexedSlices(
                    values=array_ops.identity(zero_values),
                    indices=math_ops.cast(grad.indices, dtypes.int64),
                    dense_shape=math_ops.cast(grad.dense_shape, dtypes.int64))

        return control_flow_ops.cond(gen_math_ops.equal(
            grad_accum.num_accumulated(), 0),
                                     true_fn=_zero_grad,
                                     false_fn=_take_grad)
Exemple #6
0
            def get_next_inputs():
                """ Retrieves the inputs for the next time step """
                def get_training_inputs():
                    """ Selecting training inputs """
                    read_op = self._input_tas.read(next_time)
                    with ops.control_dependencies([read_op]):
                        return array_ops.identity(read_op)

                def get_sample_inputs():
                    """ Selecting greedy/sample inputs """
                    return sample_ids

                inputs_next_step = control_flow_ops.case(
                    [(gen_math_ops.equal(self._decoder_type, TRAINING_DECODER),
                      get_training_inputs),
                     (gen_math_ops.equal(self._decoder_type,
                                         GREEDY_DECODER), get_sample_inputs),
                     (gen_math_ops.equal(self._decoder_type,
                                         SAMPLE_DECODER), get_sample_inputs)],
                    default=get_training_inputs)
                inputs_emb_next_step = self._input_layer(
                    self._embedding_fn(inputs_next_step))

                # Applying mask
                # inputs_one_hot:   (b, 1, VOC, 1)
                # mask_t:           (b, 1, VOC, VOC)
                # next_mask:        (b, VOC)        -- DenseTensor
                inputs_one_hot = array_ops.one_hot(inputs_next_step,
                                                   self.vocab_size)[:, None, :,
                                                                    None]
                mask_t = _slice_mask(self._mask, [-1, next_time, -1, -1],
                                     time_major=self._time_major)
                next_mask = sparse_ops.sparse_reduce_sum(inputs_one_hot *
                                                         mask_t,
                                                         axis=[1, 2])
                next_mask = gen_math_ops.minimum(next_mask, 1.)
                next_mask.set_shape([None, self.vocab_size])

                # Prevents this branch from executing eagerly
                with ops.control_dependencies(
                    [inputs_emb_next_step, next_mask]):
                    return MaskedInputs(
                        inputs=array_ops.identity(inputs_emb_next_step),
                        mask=array_ops.identity(next_mask))
Exemple #7
0
    def sample(self, time, outputs, state, name=None):
        """ Samples the id for the next time step (or -1 for teacher forcing)
            Note: outputs is a tuple of (cell_outputs, candidate)
        """
        cell_outputs, candidate = outputs

        with ops.name_scope(name, 'CustomHelperSample', [time, outputs, state]):

            def training():
                """ Selecting training / teacher forcing """
                fill_op = gen_array_ops.fill([array_ops.shape(cell_outputs)[0]], -1)
                with ops.control_dependencies([fill_op]):
                    return array_ops.identity(fill_op)

            def greedy():
                """ Selecting greedy """
                argmax_id = math_ops.cast(math_ops.argmax(cell_outputs, axis=-1), dtypes.int32)
                nb_candidate = array_ops.shape(candidate)[1]
                candidate_ids = \
                    math_ops.reduce_sum(array_ops.one_hot(argmax_id, nb_candidate, dtype=dtypes.int32) * candidate,
                                        axis=-1)
                with ops.control_dependencies([candidate_ids]):
                    return array_ops.identity(candidate_ids)

            def sample():
                """ Sampling """
                logits = cell_outputs if self._softmax_temperature is None else cell_outputs / self._softmax_temperature
                sample_id_sampler = categorical.Categorical(logits=logits)
                sample_ids = sample_id_sampler.sample(seed=self._seed)
                nb_candidate = array_ops.shape(candidate)[1]
                reduce_op = math_ops.reduce_sum(array_ops.one_hot(sample_ids,
                                                                  nb_candidate,
                                                                  dtype=dtypes.int32) * candidate, axis=-1)
                with ops.control_dependencies([reduce_op]):
                    return array_ops.identity(reduce_op)

            return control_flow_ops.case([(gen_math_ops.equal(self._decoder_type, TRAINING_DECODER), training),
                                          (gen_math_ops.equal(self._decoder_type, GREEDY_DECODER), greedy),
                                          (gen_math_ops.equal(self._decoder_type, SAMPLE_DECODER), sample)],
                                         default=training)
Exemple #8
0
    def initialize(self):
        """ Initialize the beam helper - Called in beam_decoder.initialize()
            :return: `(finished, start_inputs, initial_cell_state)`.
        """
        finished, zero_inputs, zero_mask = self._finished, self._zero_inputs, self._zero_mask
        all_finished = math_ops.reduce_all(
            gen_math_ops.equal(0, self._sequence_length))
        initial_inputs = self._embedding_fn(self._start_tokens)

        # Start Inputs
        start_inputs = control_flow_ops.cond(
            all_finished, lambda: zero_inputs,
            lambda: MaskedInputs(inputs=self._split_batch_beams(
                self._input_layer(initial_inputs), self._input_size),
                                 mask=zero_mask))

        # Returning
        return finished, start_inputs, self._initial_cell_state
Exemple #9
0
    def initialize(self):
        """ Initialize the beam helper - Called in beam_decoder.initialize()
            :return: `(finished, start_inputs, initial_cell_state)`.
        """
        finished, zero_inputs = self._finished, self._zero_inputs
        all_finished = math_ops.reduce_all(gen_math_ops.equal(0, self._sequence_length))
        initial_inputs = self._order_embedding_fn(self._start_tokens)
        initial_candidates = self._candidate_tas.read(0)

        # Start Inputs
        start_inputs = control_flow_ops.cond(all_finished,
                                             lambda: zero_inputs,
                                             lambda: CandidateInputs(
                                                 inputs=self._split_batch_beams(self._input_layer(initial_inputs),
                                                                                self._input_size),
                                                 candidates=initial_candidates,
                                                 candidates_emb=self._candidate_embedding_fn(initial_candidates)))

        return finished, start_inputs, self._initial_cell_state
    def _take_dense_grad(grad_accum, grad):
        """ Computes the gradient for a ConditionalAccumulator
            :param grad_accum: The gradient accumulator where gradients are stored
            :param grad: An instance of the gradient stored in the accumulator
            :return: The avg gradient to apply (or a zero-like object if no gradients are stored)
            :type grad_accum: data_flow_ops.ConditionalAccumulator
        """
        def _take_grad():
            """ Computes the gradient from the accumulator """
            avg_grad = grad_accum.take_grad(num_required=1)
            with ops.control_dependencies([avg_grad]):
                return array_ops.identity(avg_grad)

        def _zero_grad():
            """ Returns a zeroed-out gradient """
            zero_like_grad = array_ops.zeros_like(grad)
            with ops.control_dependencies([zero_like_grad]):
                return array_ops.identity(zero_like_grad)

        return control_flow_ops.cond(gen_math_ops.equal(
            grad_accum.num_accumulated(), 0),
                                     true_fn=_zero_grad,
                                     false_fn=_take_grad)
Exemple #11
0
def seeded_dropout(inputs,
                   seeds,
                   keep_probs,
                   offset=None,
                   noise_shape=None,
                   seed=None,
                   name=None):
    """ Computes dropout (with a deterministic mask).
        Every item in the batch has a deterministic seed to compute the deterministic mask

        With probability `keep_probs`, outputs the input element scaled up by `1 / keep_prob`, otherwise outputs `0`.
        The scaling is so that the expected sum is unchanged.

        By default, each element is kept or dropped independently. If `noise_shape` is specified, it must be
        broadcastable to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` will make
        independent decisions.

        For example, if `shape(x) = [k, l, m, n]` and `noise_shape = [k, 1, 1, n]`, each batch and channel component
        will be kept independently and each row and column will be kept or not kept together.

        :param inputs: A floating point tensor.
        :param seeds: A tensor representing the seed for each item in the batch. (Size: (batch,))
        :param keep_probs: A scalar or vector of size (batch,). The probability that each element is kept.
        :param offset: Integer. Alternative offset to apply to compute the deterministic mask (e.g. in a loop).
        :param noise_shape: A 1-D `Tensor` of type `int32`, represents the shape for randomly generated keep/drop flags.
        :param seed: A Python integer. Used to create a default seed for the operation.
        :param name: name: A name for this operation (optional).
        :return: A Tensor of the same shape of `x`.
    """
    if offset is None:
        seeded_dropout.offset += 40555607

    # If inputs is a scalar, this is likely the 'time' attribute in a state, we don't want to mask it
    # Same thing for integers - We can safely ignore them
    # So we don't want to mask it
    if not inputs.shape or inputs.dtype.is_integer:
        return inputs

    with ops.name_scope(name, 'seeded_dropout', [inputs]):
        inputs = ops.convert_to_tensor(inputs, name='x')
        if not inputs.dtype.is_floating:
            raise ValueError(
                'Expected a floating point tensor. Got a %s tensor instead.' %
                inputs.dtype)
        if isinstance(keep_probs, float) and not 0 < keep_probs <= 1:
            raise ValueError(
                'keep_probs must be a scalar tensor or a float in the range (0, 1], got %g'
                % keep_probs)

        # Early return if nothing needs to be dropped.
        if isinstance(keep_probs, float) and keep_probs == 1:
            return inputs

        # Not supported in eager mode
        if context.executing_eagerly():
            raise ValueError('This function is not supported in eager mode.')

        # Converting to tensor
        keep_probs = ops.convert_to_tensor(keep_probs,
                                           dtype=inputs.dtype,
                                           name='keep_probs')
        keep_probs = gen_math_ops.maximum(0.,
                                          gen_math_ops.minimum(1., keep_probs))
        keep_probs = gen_array_ops.reshape(keep_probs, [-1] + [1] *
                                           (len(inputs.shape) - 1))
        all_keep_probs_are_one = math_ops.reduce_all(
            gen_math_ops.equal(keep_probs, 1.))

        # Computing noise shape
        noise_shape = nn_ops._get_noise_shape(inputs, noise_shape)  # pylint: disable=protected-access

        def get_dropout_mask():
            """ Computes the dropout mask """
            # random_tensor = uniform [keep_probs, 1.0 + keep_probs)
            random_tensor = keep_probs
            random_tensor += seeded_random(
                seeds,
                offset=offset if offset is not None else seeded_dropout.offset,
                shape=noise_shape[1:],
                dtype=inputs.dtype,
                seed=seed)

            # 0. if [keep_probs, 1.0) and 1. if [1.0, 1.0 + keep_prob)
            binary_tensor = gen_math_ops.floor(random_tensor)
            ret = math_ops.divide(inputs, keep_probs) * binary_tensor
            ret.set_shape(inputs.get_shape())

            # Setting control flow ops to avoid computing this function if not required
            with ops.control_dependencies([ret]):
                return array_ops.identity(ret)

        # Returning the dropout mask
        return control_flow_ops.cond(all_keep_probs_are_one,
                                     true_fn=lambda: inputs,
                                     false_fn=get_dropout_mask)