コード例 #1
0
    def _update_distributed_as_chief(self, version_step=None):
        """ Performs the gradient averaging, updates the variables, and the global step
            :param version_step: A variable that represents the model's version
            :return: The update operation to run

            Note: This method is called by the chief when synchronization is required.
        """
        # Creating sync_token queue
        with ops.device(self._global_step.device), ops.name_scope(''):
            self._sync_token_queue = data_flow_ops.FIFOQueue(
                capacity=-1,
                dtypes=self._global_step.dtype.base_dtype,
                shapes=(),
                name='sync_token_q',
                shared_name='sync_token_q')

            # Applying grads, then adding tokens to queue
            with ops.control_dependencies([self._apply_grad_op]):
                tokens = gen_array_ops.fill([self._num_workers],
                                            self._global_step)
                sync_op = self._sync_token_queue.enqueue_many((tokens, ))

                # Waiting for token in queue (sync point)
                with ops.control_dependencies([sync_op]):
                    token = self._sync_token_queue.dequeue()
                    update_ops = [state_ops.assign(self._local_step, token)]

                    # Increasing version step
                    if version_step is not None:
                        update_ops += [state_ops.assign_add(version_step, 1)]

                    # Returning
                    return control_flow_ops.group(*update_ops)
コード例 #2
0
    def __init__(self, cell, order_embedding, candidate_embedding, candidates, sequence_length, initial_state,
                 beam_width, input_layer=None, output_layer=None, time_major=False):
        """ Initialize the CustomBeamHelper
            :param cell: An `RNNCell` instance.
            :param order_embedding: The order embedding vector  - Size: (batch, ord_emb_size)
            :param candidate_embedding: The candidate embedding vector - Size: (batch, cand_emb_size)
            :param candidates: The candidates at each time step -- Size: (batch, nb_cand, max_candidates)
            :param sequence_length: The length of each sequence (batch,)
            :param initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
            :param beam_width: Python integer, the number of beams.
            :param input_layer: Optional. A layer to apply on the inputs
            :param output_layer: Optional. An instance of `tf.layers.Layer`, i.e., `tf.layers.Dense`. Optional layer
                                 to apply to the RNN output prior to storing the result or sampling.
            :param time_major: If true indicates that the first dimension is time, otherwise it is batch size.
        """
        # pylint: disable=super-init-not-called,too-many-arguments
        rnn_cell_impl.assert_like_rnncell('cell', cell)                                                                 # pylint: disable=protected-access
        assert isinstance(beam_width, int), 'beam_width should be a Python integer'

        self._sequence_length = ops.convert_to_tensor(sequence_length, name='sequence_length')
        if self._sequence_length.get_shape().ndims != 1:
            raise ValueError("Expected vector for sequence_length. Shape: %s" % self._sequence_length.get_shape())

        candidates = ops.convert_to_tensor(candidates, name='candidates')
        candidates = nest.map_structure(_transpose_batch_time, candidates) if not time_major else candidates

        self._cell = cell
        self._order_embedding_fn = _get_embedding_fn(order_embedding)
        self._candidate_embedding_fn = _get_embedding_fn(candidate_embedding)
        self._candidate_tas = nest.map_structure(_unstack_ta, candidates)
        self._input_layer = input_layer if input_layer is not None else lambda x: x
        self._output_layer = output_layer

        self._input_size = order_embedding.shape[-1]
        if input_layer is not None:
            self._input_size = self._input_layer.compute_output_shape([None, self._input_size])[-1]

        self._batch_size = array_ops.size(sequence_length)
        self._start_tokens = gen_array_ops.fill([self._batch_size * beam_width], GO_ID)
        self._end_token = -1
        self._beam_width = beam_width
        self._initial_cell_state = nest.map_structure(self._maybe_split_batch_beams,
                                                      initial_state,
                                                      self._cell.state_size)
        self._finished = array_ops.one_hot(array_ops.zeros([self._batch_size], dtype=dtypes.int32),
                                           depth=self._beam_width,
                                           on_value=False,
                                           off_value=True,
                                           dtype=dtypes.bool)

        # Compute input shape
        self._zero_inputs = \
            CandidateInputs(inputs=
                            array_ops.zeros_like(self._split_batch_beams(
                                self._input_layer(self._order_embedding_fn(self._start_tokens)),
                                self._input_size)),
                            candidates=array_ops.zeros_like(candidates[0, :]),
                            candidates_emb=array_ops.zeros_like(self._candidate_embedding_fn(candidates[0, :])))
コード例 #3
0
    def __init__(self, decoder_type, inputs, order_embedding, candidate_embedding, sequence_length, candidates,
                 input_layer=None, time_major=False, softmax_temperature=None, seed=None, name=None):
        """ Constructor
            :param decoder_type: An uint8 representing TRAINING_DECODER, GREEDY_DECODER, or SAMPLE_DECODER
            :param inputs: The decoder input (b, dec_len)
            :param order_embedding: The order embedding vector
            :param candidate_embedding: The candidate embedding vector
            :param sequence_length: The length of each input (b,)
            :param candidates: The candidates at each time step -- Size: (b, nb_cand, max_candidates)
            :param input_layer: Optional. A layer to apply on the inputs
            :param time_major: If true indicates that the first dimension is time, otherwise it is batch size
            :param softmax_temperature: Optional. Softmax temperature. None, scalar, or size: (batch_size,)
            :param seed: Optional. The sampling seed
            :param name: Optional scope name.
        """
        # pylint: disable=too-many-arguments
        with ops.name_scope(name, "CustomHelper", [inputs, sequence_length, order_embedding, candidate_embedding]):
            inputs = ops.convert_to_tensor(inputs, name="inputs")
            candidates = ops.convert_to_tensor(candidates, name="candidates")
            self._inputs = inputs
            self._order_embedding_fn = _get_embedding_fn(order_embedding)
            self._candidate_embedding_fn = _get_embedding_fn(candidate_embedding)
            if not time_major:
                inputs = nest.map_structure(_transpose_batch_time, inputs)
                candidates = nest.map_structure(_transpose_batch_time, candidates)
            self._input_tas = nest.map_structure(_unstack_ta, inputs)
            self._candidate_tas = nest.map_structure(_unstack_ta, candidates)
            self._decoder_type = decoder_type
            self._sequence_length = ops.convert_to_tensor(sequence_length, name="sequence_length")
            if self._sequence_length.get_shape().ndims != 1:
                raise ValueError("Expected vector for sequence_length. Shape: %s" % self._sequence_length.get_shape())
            self._input_layer = input_layer if input_layer is not None else lambda x: x
            self._batch_size = array_ops.size(sequence_length)
            self._start_inputs = gen_array_ops.fill([self._batch_size], GO_ID)
            self._softmax_temperature = softmax_temperature
            self._seed = seed

            # Compute input shape
            self._zero_inputs = \
                CandidateInputs(inputs=
                                array_ops.zeros_like(self._input_layer(self._order_embedding_fn(self._start_inputs))),
                                candidates=array_ops.zeros_like(candidates[0, :]),
                                candidates_emb=array_ops.zeros_like(self._candidate_embedding_fn(candidates[0, :])))

            # Preventing div by zero
            # Adding an extra dim to the matrix, so we can broadcast with the outputs shape
            if softmax_temperature is not None:
                self._softmax_temperature = gen_math_ops.maximum(1e-10, self._softmax_temperature)
                if self._softmax_temperature.get_shape().ndims == 1:
                    self._softmax_temperature = self._softmax_temperature[:, None]
コード例 #4
0
        def body(time, outputs_ta, state, inputs, finished, sequence_lengths):
            """ Internal while_loop body. """
            (next_outputs, decoder_state, next_inputs,
             decoder_finished) = decoder.step(time, inputs, state)
            if decoder.tracks_own_finished:
                next_finished = decoder_finished
            else:
                next_finished = gen_math_ops.logical_or(
                    decoder_finished, finished)
            next_sequence_lengths = array_ops.where(
                gen_math_ops.logical_not(finished),
                gen_array_ops.fill(array_ops.shape(sequence_lengths),
                                   time + 1), sequence_lengths)

            nest.assert_same_structure(state, decoder_state)
            nest.assert_same_structure(outputs_ta, next_outputs)
            nest.assert_same_structure(inputs, next_inputs)

            # Zero out output values past finish
            if impute_finished:
                emit = nest.map_structure(
                    lambda out, zero: array_ops.where(finished, zero, out),
                    next_outputs, zero_outputs)
            else:
                emit = next_outputs

            # Copy through states past finish
            def _maybe_copy_state(new, cur):
                # TensorArrays, multiple dynamic dims, and scalar states get passed through.
                if isinstance(cur, tensor_array_ops.TensorArray):
                    pass_through = True
                elif None in new.shape.as_list()[1:]:
                    pass_through = True
                else:
                    new.set_shape(cur.shape)
                    pass_through = (new.shape.ndims == 0)
                return new if pass_through else array_ops.where(
                    finished, cur, new)

            if impute_finished:
                next_state = nest.map_structure(_maybe_copy_state,
                                                decoder_state, state)
            else:
                next_state = decoder_state

            outputs_ta = nest.map_structure(
                lambda ta, out: ta.write(time, out), outputs_ta, emit)
            return (time + 1, outputs_ta, next_state, next_inputs,
                    next_finished, next_sequence_lengths)
コード例 #5
0
    def __init__(self,
                 cell,
                 embedding,
                 mask,
                 sequence_length,
                 initial_state,
                 beam_width,
                 input_layer=None,
                 output_layer=None,
                 time_major=False):
        """ Initialize the CustomBeamHelper
            :param cell: An `RNNCell` instance.
            :param embedding: The embedding vector
            :param mask: [SparseTensor] Mask to apply at each time step -- Size: (b, dec_len, vocab_size, vocab_size)
            :param sequence_length: The length of each input (b,)
            :param initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
            :param beam_width: Python integer, the number of beams.
            :param input_layer: Optional. A layer to apply on the inputs
            :param output_layer: Optional. An instance of `tf.layers.Layer`, i.e., `tf.layers.Dense`. Optional layer
                                 to apply to the RNN output prior to storing the result or sampling.
            :param time_major: If true indicates that the first dimension is time, otherwise it is batch size.
        """
        # pylint: disable=super-init-not-called,too-many-arguments
        rnn_cell_impl.assert_like_rnncell('cell', cell)  # pylint: disable=protected-access
        assert isinstance(mask,
                          SparseTensor), 'The mask must be a SparseTensor'
        assert isinstance(beam_width,
                          int), 'beam_width should be a Python integer'

        self._sequence_length = ops.convert_to_tensor(sequence_length,
                                                      name='sequence_length')
        if self._sequence_length.get_shape().ndims != 1:
            raise ValueError("Expected vector for sequence_length. Shape: %s" %
                             self._sequence_length.get_shape())

        self._cell = cell
        self._embedding_fn = _get_embedding_fn(embedding)
        self._mask = mask
        self._time_major = time_major
        self.vocab_size = VOCABULARY_SIZE
        self._input_layer = input_layer if input_layer is not None else lambda x: x
        self._output_layer = output_layer

        self._input_size = embedding.shape[-1]
        if input_layer is not None:
            self._input_size = self._input_layer.compute_output_shape(
                [None, self._input_size])[-1]

        self._batch_size = array_ops.size(sequence_length)
        self._start_tokens = gen_array_ops.fill(
            [self._batch_size * beam_width], GO_ID)
        self._end_token = -1
        self._beam_width = beam_width
        self._initial_cell_state = nest.map_structure(
            self._maybe_split_batch_beams, initial_state,
            self._cell.state_size)
        self._finished = array_ops.one_hot(array_ops.zeros([self._batch_size],
                                                           dtype=dtypes.int32),
                                           depth=self._beam_width,
                                           on_value=False,
                                           off_value=True,
                                           dtype=dtypes.bool)

        # zero_mask is (batch, beam, vocab_size)
        self._zero_mask = _slice_mask(self._mask,
                                      slicing=[-1, 0, GO_ID, -1],
                                      squeeze=True,
                                      time_major=self._time_major)
        self._zero_mask = gen_array_ops.tile(
            array_ops.expand_dims(self._zero_mask, axis=1),
            [1, self._beam_width, 1])
        self._zero_inputs = \
            MaskedInputs(
                inputs=array_ops.zeros_like(
                    self._split_batch_beams(
                        self._input_layer(self._embedding_fn(self._start_tokens)), self._input_size)),
                mask=self._zero_mask)
コード例 #6
0
 def training():
     """ Selecting training / teacher forcing """
     fill_op = gen_array_ops.fill([array_ops.shape(outputs)[0]], -1)
     with ops.control_dependencies([fill_op]):
         return array_ops.identity(fill_op)
コード例 #7
0
    def __init__(self,
                 decoder_type,
                 inputs,
                 embedding,
                 sequence_length,
                 mask,
                 input_layer=None,
                 time_major=False,
                 softmax_temperature=None,
                 seed=None,
                 name=None):
        """ Constructor
            :param decoder_type: An uint8 representing TRAINING_DECODER, GREEDY_DECODER, or SAMPLE_DECODER
            :param inputs: The decoder input (b, dec_len)
            :param embedding: The embedding vector
            :param sequence_length: The length of each input (b,)
            :param mask: [SparseTensor] Mask to apply at each time step -- Size: (b, dec_len, vocab_size, vocab_size)
            :param input_layer: Optional. A layer to apply on the inputs
            :param time_major: If true indicates that the first dimension is time, otherwise it is batch size
            :param softmax_temperature: Optional. Softmax temperature. None or size: (batch_size,)
            :param seed: Optional. The sampling seed
            :param name: Optional scope name.
        """
        # pylint: disable=too-many-arguments
        with ops.name_scope(name, "CustomHelper",
                            [inputs, sequence_length, embedding]):
            assert isinstance(mask,
                              SparseTensor), 'The mask must be a SparseTensor'
            inputs = ops.convert_to_tensor(inputs, name="inputs")
            self._inputs = inputs
            self._mask = mask
            self._time_major = time_major
            self._embedding_fn = embedding if callable(
                embedding) else lambda ids: embedding_lookup(embedding, ids)
            if not time_major:
                inputs = nest.map_structure(_transpose_batch_time, inputs)
            self._input_tas = nest.map_structure(_unstack_ta, inputs)
            self._decoder_type = decoder_type
            self._sequence_length = ops.convert_to_tensor(
                sequence_length, name="sequence_length")
            if self._sequence_length.get_shape().ndims != 1:
                raise ValueError(
                    "Expected vector for sequence_length. Shape: %s" %
                    self._sequence_length.get_shape())
            self._input_layer = input_layer if callable(
                input_layer) else lambda x: x
            self._batch_size = array_ops.size(sequence_length)
            self._start_inputs = gen_array_ops.fill([self._batch_size], GO_ID)
            self._softmax_temperature = softmax_temperature
            self._seed = seed
            self.vocab_size = VOCABULARY_SIZE
            self._zero_inputs = \
                MaskedInputs(inputs=array_ops.zeros_like(self._input_layer(self._embedding_fn(self._start_inputs))),
                             mask=_slice_mask(self._mask,
                                              slicing=[-1, 0, GO_ID, -1],
                                              squeeze=True,
                                              time_major=self._time_major))

            # Preventing div by zero
            # Adding an extra dim to the matrix, so we can broadcast with the outputs shape
            if softmax_temperature is not None:
                self._softmax_temperature = gen_math_ops.maximum(
                    1e-10, self._softmax_temperature)
                if self._softmax_temperature.get_shape().ndims == 1:
                    self._softmax_temperature = self._softmax_temperature[:,
                                                                          None]
コード例 #8
0
ファイル: transformer.py プロジェクト: zhanpengfang/research
    def _step(self, inputs, past_attns, time, feeder_cell, feeder_state):
        """ Performs the block operation on n-layers
            :param inputs: The tensor inputs (embedding of each word) - [batch, seq_len, emb_size]
            :param past_attns: The past attentions - [batch, nb_layers, 2, nb_heads. past_length, emb_size // nb_heads]
            :param time: A tensor representing the current time step
            :param feeder_cell: None or A feeder cell that returns a RNN cell output to use for conditioning
            :param feeder_state: None or the initial state of the feeder cell
            :param name: Name of the scope - To share weights between calls
            :return: A tuple consisting of:
                        1) The cell outputs - [batch, seq_len, emb_size]
                        2) The present attention - [batch, nb_layers, 2, nb_heads. seq_len, emb_size // nb_heads]
                        3) The new state of the feeder cell
        """
        with variable_scope.variable_scope(self._scope, default_name='step'):
            past_length = array_ops.shape(past_attns)[
                -2]  # How many past attention steps we have
            seq_len = array_ops.shape(inputs)[
                -2]  # How many steps are we computing for the current time
            emb_size = inputs.shape[-1].value  # The size of the embedding
            assert emb_size == self._emb_size, 'Expected an embedding size of %d' % self._emb_size

            # 1) Computing the word embedding of each token
            assert inputs.shape.ndims == 3, 'Expected [batch, seq_len, emb_size]'  # [bz, seq, emb]
            out_h = inputs

            # 2) Computing the position embedding of each token
            # If we know the context was padded, the effective past length is the context length + nb of time steps
            if self._past_seq_lengths is not None:
                past_length = gen_math_ops.minimum(
                    past_length,
                    self._past_seq_lengths + time)[:, None]  # [bz, 1]
            else:
                past_length = gen_array_ops.fill([self._batch_size, 1],
                                                 value=past_length)  # [bz, 1]
            step_ix = math_ops.range(seq_len)[None, :]  # [1, seq_len]
            token_positions = gen_math_ops.add(past_length,
                                               step_ix)  # [batch, seq_len]
            token_positions = gen_math_ops.minimum(
                self._position_emb_size - 1,
                token_positions)  # [batch, seq_len]
            h_pos = self._position_embedding_fn(
                token_positions)  # [bz, seq, emb]
            out_h = out_h + h_pos

            # 3) If we have a feeder cell, we also need to condition 'h' on it.
            next_feeder_state = feeder_state
            if feeder_cell is not None:
                assert feeder_state is not None, 'A feeder state is required if a feeder cell is provided.'
                assert inputs.shape[
                    1].value == 1, 'The seq dimension must be 1 to use a feeder_cell'
                feeder_outputs, next_feeder_state = feeder_cell(
                    array_ops.squeeze(inputs, axis=1), feeder_state)
                h_feed = feeder_outputs  # [bz, feeder_sz]
                if feeder_outputs.shape[-1].value != emb_size:
                    h_feed = core.Dense(emb_size,
                                        activation=None,
                                        name='h_feed')(h_feed)  # [bz, emb]
                h_feed = gen_array_ops.tile(h_feed[:, None, :],
                                            [1, seq_len, 1])  # [bz, seq, emb]
                out_h = out_h + h_feed

            # Transformer
            presents = []
            pasts = array_ops.unstack(
                past_attns,
                axis=1)  # list of [batch, 2, heads, past_len, head_sz]
            assert len(
                pasts
            ) == self._nb_layers, 'Expected the past attention to have %d layers.' % self._nb_layers

            for layer_ix, past_attn in enumerate(pasts):
                out_h, present = self._block(out_h, past_attn,
                                             'layer.%d' % layer_ix)
                presents += [present]
            presents = array_ops.stack(presents, axis=1)

            # Normalizing and returning
            cell_outputs = self._norm(out_h, 'norm_h')  # [batch, seq, emb]
            return cell_outputs, presents, next_feeder_state