Exemple #1
0
    def FProp(self, theta, prepared_inputs, step_inputs, padding, state0):
        """Looks up a list of embeddings from an EmbeddingLayer.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      prepared_inputs: unused.
      step_inputs: A NestedMap containing a list called inputs. This list should
        contain a single integer tensor of shape [batch], where each integer
        represents an index into the embedding table. (By convention, all Steps
        that can be used with StackStep must store inputs in
        step_inputs.inputs[], but in this step it does not make sense for that
        list to have more than one tensor in it).
      padding: unused.
      state0: unused.

    Returns:
      A params.dtype tensor of shape [batch, embedding_dim].
    """
        del prepared_inputs
        del state0
        assert len(step_inputs.inputs) == 1

        output = self.emb.EmbLookup(theta.emb, step_inputs.inputs[0])
        return py_utils.NestedMap(output=output), py_utils.NestedMap()
Exemple #2
0
    def FPropMeta(cls, p, *args):
        py_utils.CheckShapes(args)
        total = 0

        graph_tensors = GraphTensors()
        assert len(p.input_endpoints) == len(args)
        for n, t in zip(p.input_endpoints, args):
            graph_tensors.StoreTensor(n, t)

        ch_out = None
        for signature, sub in p.sub:
            sig = GraphSignature(signature)
            template = py_utils.NestedMap(inputs=sig.inputs)
            packed = template.Transform(graph_tensors.GetTensor)
            input_args = packed.inputs

            meta = sub.cls.FPropMeta(sub, *input_args)
            total += meta.flops
            ch_out = meta.out_shapes
            assert len(ch_out) == len(sig.outputs)
            for n, t in zip(sig.outputs, ch_out):
                graph_tensors.StoreTensor(n, t)

        layer_out = tuple(
            graph_tensors.GetTensor(x) for x in p.output_endpoints)
        return py_utils.NestedMap(flops=total, out_shapes=layer_out)
 def Step(recurrent_theta, state0, inputs):
     """Computes one decoder step."""
     del inputs
     with tf.name_scope('single_sampler_step'):
         # Compute logits and states.
         bs_result, bs_state1 = pre_step_callback(
             recurrent_theta.theta,
             recurrent_theta.encoder_outputs,
             tf.expand_dims(state0.ids, 1),  # [batch, 1].
             state0.bs_state,
             num_hyps_per_beam=1)
         batch = tf.shape(bs_result.log_probs)[0]
         state1 = py_utils.NestedMap(timestep=state0.timestep + 1)
         state1.logits = bs_result.log_probs
         # Sample ids from logits. [batch].
         state1.ids = tf.reshape(
             tf.random.stateless_categorical(
                 state1.logits / p.temperature,
                 num_samples=1,
                 seed=tf.stack(
                     [recurrent_theta.random_seed, state0.timestep]),
                 dtype=state0.ids.dtype,
                 name='sample_next_id'), [batch])
         if 'is_last_chunk' in bs_result and p.target_eoc_id >= 0:
             state1.ids = tf.where(
                 tf.math.logical_and(
                     bs_result.is_last_chunk,
                     tf.equal(state1.ids, p.target_eoc_id)),
                 tf.fill(tf.shape(state1.ids), p.target_eos_id),
                 state1.ids)
         state1.bs_state = post_step_callback(
             recurrent_theta.theta, recurrent_theta.encoder_outputs,
             state1.ids, bs_state1)
     return state1, py_utils.NestedMap()
Exemple #4
0
  def _ProcessSingleInput(self, source_id, src, tgt):
    """Performs strings-to-ids on the given input pair via p.tokenizer_dict."""
    _, src_labels, src_paddings = self.StringsToIds(
        tf.reshape(src, [1]), is_source=True, key=self._src_tokenizer_key)
    tgt_ids, tgt_labels, tgt_paddings = self.StringsToIds(
        tf.reshape(tgt, [1]), is_source=False, key=self._tgt_tokenizer_key)
    # Mask positions to 0 where padding is 1 for consistency. We do this because
    # tokenizer implementation may use EOS token to pad.
    src_labels = py_utils.ApplyPadding(src_paddings, src_labels)
    tgt_ids = py_utils.ApplyPadding(tgt_paddings, tgt_ids)
    tgt_labels = py_utils.ApplyPadding(tgt_paddings, tgt_labels)

    features = py_utils.NestedMap()
    features.src = py_utils.NestedMap()
    features.src.ids = src_labels
    # ids_indicator is 1 if and only if the output from tokenizer has a
    # non-padded id. Unlike weights, it will not mutate and can be used for
    # determining actual sequence length, for example.
    features.src.ids_indicator = 1 - src_paddings
    features.tgt = py_utils.NestedMap()
    features.tgt.ids = tgt_ids
    features.tgt.labels = tgt_labels
    features.tgt.ids_indicator = 1 - tgt_paddings

    src_task_id, tgt_task_id = self._GetTaskIds(source_id)
    # task_ids are padded with zeros.
    features.src.task_ids = tf.cast(
        features.src.ids_indicator, dtype=tf.int32) * src_task_id
    features.tgt.task_ids = tf.cast(
        features.tgt.ids_indicator, dtype=tf.int32) * tgt_task_id

    if not py_utils.use_tpu():
      features.src.strs = src
      features.tgt.strs = tgt
    return features.Transform(tf.squeeze)
Exemple #5
0
  def _InputBatch(self):
    ret = py_utils.NestedMap()

    ret.bucket_keys = self._bucket_keys

    ret.src = py_utils.NestedMap()
    ret.src.ids = tf.cast(self._src_ids, dtype=tf.int32)
    ret.src.paddings = self._src_paddings

    ret.tgt = py_utils.NestedMap()
    ret.tgt.ids = self._tgt_ids
    ret.tgt.labels = tf.cast(self._tgt_labels, dtype=tf.int32)
    ret.tgt.weights = self._tgt_weights
    ret.tgt.paddings = self._tgt_paddings

    if (self.params.fprop_dtype is None or
        self.params.dtype == self.params.fprop_dtype):
      return ret

    def _Cast(v):
      if not v.dtype.is_floating:
        return v
      return tf.cast(v, self.params.fprop_dtype)

    return ret.Transform(_Cast)
  def Mask(self, seq_ids, weights, actual_seq_len):
    p = self.params
    (src_ids, tgt_ids, tgt_labels, tgt_weights) = ops.mass(
        seq_ids,
        weights,
        actual_seq_len,
        mask_id=p.mask_id,
        mask_ratio=p.mask_ratio,
        mask_minlen=p.mask_minlen,
        span_len=p.span_len,
        random_start_prob=p.random_start_prob,
        keep_prob=p.keep_prob,
        rand_prob=p.rand_prob,
        mask_prob=p.mask_prob,
        mask_target=p.mask_target,
        vocab_size=p.vocab_size,
        first_unreserved_id=p.first_unreserved_id)

    mass_out = py_utils.NestedMap()
    mass_out.src = py_utils.NestedMap()
    mass_out.src.ids = src_ids
    mass_out.tgt = py_utils.NestedMap()
    mass_out.tgt.ids = tgt_ids
    mass_out.tgt.labels = tgt_labels
    mass_out.tgt.weights = tgt_weights
    return mass_out
    def FProp(self, theta, prepared_inputs, step_inputs, padding, state0):
        """Performs one inference step on the RNN cell.

    If external_inputs is not None, it is added as another act input
    to the RNNCell.

    Args:
      theta: Variables used by the RNNCell.
      prepared_inputs: If not None, concatenated with step_inputs.input. A
        tensor of shape [batch_size, external_input_dim].
      step_inputs: A NestedMap containing an 'input' list of [batch_size, dim]
        where the sum of dim (including external_inputs) is
        p.cell.num_input_nodes.
      padding: A 0/1 float tensor of shape [batch_size]; 1.0 means that this
        batch element is empty in this step.
      state0: A NestedMap of state, either produced by ZeroState or a previous
        invocation of FProp.

    Returns:
      (output, state1), where output is the cell output (GetOutput(state1))
      of shape [batch_size, p.cell.num_output_nodes], and state1 is the cell's
      recurrent state.
    """
        cell_inputs = py_utils.NestedMap(act=step_inputs.inputs)
        # An empty NestedMap can act as a None value here.
        if prepared_inputs is not None and not isinstance(
                prepared_inputs, py_utils.NestedMap):
            cell_inputs.act.append(prepared_inputs)
        cell_inputs.padding = padding
        state1, extra = self.cell.FProp(theta.cell, state0, cell_inputs)
        return py_utils.NestedMap(output=self.cell.GetOutput(state1),
                                  extra=extra,
                                  padding=padding), state1
Exemple #8
0
    def PrepareExternalInputs(self, theta, external_inputs):
        """Prepares external inputs for each sub-step.

    The external_inputs parameter of this method is processed by the
    external_inputs of each sub-step, then processed by the sub-step's
    PrepareExternalInputs method.

    Args:
      theta: variables used by sub-steps.
      external_inputs: A NestedMap of [n_batch, ...] tensors.

    Returns:
      A NestedMap of prepared inputs, where the keys are the names of
        each sub-step.
    """
        graph_tensors = builder_layers.GraphTensors()
        graph_tensors.StoreTensor('external_inputs', external_inputs)
        prepared_inputs = py_utils.NestedMap()
        with tf.name_scope(self.params.name):
            for seq in self._seq:
                if seq.external_signature:
                    template = py_utils.NestedMap(
                        inputs=seq.external_signature.inputs)
                    packed = template.Transform(graph_tensors.GetTensor)
                    seq_external_inputs = packed.inputs[0]
                    prepared_inputs[seq.name] = seq.step.PrepareExternalInputs(
                        theta[seq.name], seq_external_inputs)
                else:
                    prepared_inputs[seq.name] = py_utils.NestedMap()
        return prepared_inputs
Exemple #9
0
    def FProp(self, theta, prepared_inputs, step_inputs, padding, state0):
        """Performs inference on N steps at once and concatenates the result.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      prepared_inputs: An output from PrepareExternalInputs.
      step_inputs: A `.NestedMap` containing a list called 'inputs'.
      padding: A 0/1 float tensor of shape [batch_size]; 1.0 means that this
        batch element is empty in this step.
      state0: The previous recurrent state.

    Returns:
      A tuple (output, state1):

      - output: A `.NestedMap` containing the output of the top-most step.
      - state1: The recurrent state to feed to next invocation of this graph.
    """
        state1 = py_utils.NestedMap(sub=[None] * len(self.sub))
        outputs = [None] * len(self.sub)

        for i in range(len(self.sub)):
            outputs[i], state1.sub[i] = self.sub[i].FProp(
                theta.sub[i], prepared_inputs.sub[i], step_inputs, padding,
                state0.sub[i])

        output = py_utils.NestedMap(output=tf.concat(outputs, axis=1))
        return output, state1
Exemple #10
0
    def _ConsumeMap(self):
        """Return the NestedMap that starts at the current position, and increment.

    Returns:
      The NestedMap that starts at the current token position.
    """
        if self._i >= len(self._tokens):
            raise ValueError(
                'Ran out of tokens while looking for a NestedMap.')
        if self._tokens[self._i] != '(':
            raise ValueError('Expected ( at token position %d' % (self._i))
        self._i += 1
        if self._MaybeConsumeSymbol(')'):
            # Empty NestedMaps are allowed.
            return py_utils.NestedMap()
        result = py_utils.NestedMap()
        while self._i < len(self._tokens):
            name = self._ConsumeKey()
            self._ConsumeSymbol('=')
            result[name] = self._ConsumeItem()
            if self._MaybeConsumeSymbol(')'):
                return result
            self._ConsumeSymbol(',')
        raise ValueError(
            'Ran out of tokens while looking for end of NestedMap.')
Exemple #11
0
    def __init__(self, params):
        """Layer constructor.

    Sub-classes of BaseLayer should decorator its __init__ with
    @base_layer.initializer

    Args:
      params: A params used to construct this layer.
    """
        assert params.name, ('Layer params for %s must have a "name"' %
                             self.__class__.__name__)

        tf_module_name = params.name
        tf_module_name = re.sub('[^a-zA-Z0-9_]+', '_', tf_module_name)
        tf_module_name = 'bbf_' + self.__class__.__name__ + '_' + tf_module_name
        py_utils.NestedMap.CheckKey(tf_module_name)

        # initialize the base class.
        super(BaseLayer, self).__init__(tf_module_name)

        # Note AutoTracking doesn't work properly due to its inability to walk
        # through py_utils.NestedMap data structures which are used widely
        # throughout the Lingvo codebase. Also there seems to be some performance
        # hit in turning on auto-tracking in constructing graphs. For now, we
        # disable auto-tracking.
        # TODO(lingvo): Re-enable auto-tracking when fuller support is
        # added for key data structures used in Lingvo, and performance issue is
        # debugged more and understood better.
        self._setattr_tracking = False

        self._parent = (_LAYER_STACK.layer_stack[-2]
                        if len(_LAYER_STACK.layer_stack) > 1 else None)
        assert self._parent is not self
        self._params = params.Copy()
        tf.logging.debug('Creating layer %s with params: \n %s \n',
                         self.__class__.__name__, str(params))
        # Vars created by this layer.
        self._private_vars = py_utils.NestedMap()
        # Theta derived from this layer's vars.
        self._private_theta = py_utils.NestedMap()
        # Child layers created by this layer through CreateChild/CreateChildren.
        self._private_children = py_utils.NestedMap()
        # Child layers created by this layer. A well-formed layer should
        # have self._private_children equals to self._children_list. I.e.,
        # all child layers are created using CreateChild/CreateChildren.
        self._children_list = []
        # Extra theta's not directly correpond to any underlying vars. For example,
        # the concatenated sharded variables.
        self._extra_theta = py_utils.NestedMap()
        # All registered accumulators.
        self._private_accumulators = py_utils.NestedMap()
        # Layer-private functions. Add with AddFunction.
        self._private_fns = dict()
        # Mapping from variable names to its symbolic shape.
        # self._var_symbolic_shape_map['var_name'] will be a tuple of integers or
        # symbolic expressions, one for each dimension of the variable.
        self._var_symbolic_shape_map = dict()

        self.AddExtraTheta('global_step', py_utils.GetGlobalStep())
Exemple #12
0
 def RnnStep(recurrent_theta, recurrent_state0, recurrent_inputs):
     """Compute a single timestep."""
     output, state1 = self.step.FProp(
         theta=recurrent_theta.theta,
         prepared_inputs=recurrent_theta.prepared_inputs,
         step_inputs=recurrent_inputs.inputs,
         padding=recurrent_inputs.padding,
         state0=recurrent_state0.state)
     recurrent_state1 = py_utils.NestedMap(output=output, state=state1)
     return recurrent_state1, py_utils.NestedMap()
Exemple #13
0
    def ComputePredictions(self, theta, batch):
        # pyformat: disable
        """Compute the model predictions.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      batch: A `.NestedMap`.

        - src: A `.NestedMap`.
          - ids: The source ids, ends in <eos>.
          - paddings: The source paddings.

        - tgt: A `.NestedMap`.
          - ids: The target ids, ends in <eos>.
          - paddings: The target paddings.

    Returns:
      A `.NestedMap`.
        - outputs: The contextualized output vectors of shape
          [batch_size, time_dim, model_dim].
        - tgt: A `.NestedMap` (optional, only during training).
          - ids: The canvas ids.
          - paddings: The canvas paddings.
          - target_indices: The target indices.
          - target_weights: The target weights.
    """
        # pyformat: enable
        p = self.params

        # TODO(williamchan): Currently, we only support KERMIT mode (i.e., no
        # encoder, unified architecture).
        assert not p.encoder

        # Sometimes src and tgt have different types. We reconcile here and use
        # int32.
        batch.src.ids = tf.cast(batch.src.ids, tf.int32)
        batch.tgt.ids = tf.cast(batch.tgt.ids, tf.int32)

        canvas_and_targets = self._CreateCanvasAndTargets(batch)
        batch = py_utils.NestedMap(tgt=py_utils.NestedMap(
            ids=canvas_and_targets.canvas,
            paddings=canvas_and_targets.canvas_paddings))

        predictions = super(InsertionModel,
                            self).ComputePredictions(theta, batch)

        if not self.do_eval:
            predictions.tgt = py_utils.NestedMap(
                ids=canvas_and_targets.canvas,
                paddings=canvas_and_targets.canvas_paddings,
                target_indices=canvas_and_targets.target_indices,
                target_weights=canvas_and_targets.target_weights)

        return predictions
Exemple #14
0
    def FProp(self, theta, prepared_inputs, step_inputs, padding, state0):
        """Performs inference on the stack of sub-steps.

    There are three possible ways to feed input to the stack:

      * step_inputs.inputs: These tensors are fed only to the lowest layer.
      * step_inputs.context: [Optional] This tensor is fed to every layer.
      * prepared_inputs: [Optional] This tensor is fed to every layer and
          is assumed to stay constant over all steps.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      prepared_inputs: An output from PrepareExternalInputs.
      step_inputs: A `.NestedMap` containing a list called 'inputs', an
        optionally a tensor called 'context'.
      padding: A 0/1 float tensor of shape [batch_size]; 1.0 means that this
        batch element is empty in this step.
      state0: The previous recurrent state.

    Returns:
      A tuple (output, state1):

      - output: A `.NestedMap` containing the output of the top-most step.
      - state1: The recurrent state to feed to next invocation of this graph.
    """
        state1 = py_utils.NestedMap(sub=[])
        inputs = list(step_inputs.inputs)
        # We pretend that the input is the output of layer -1 for the purposes
        # of residual connections.
        residual_inputs = [tf.concat(inputs, axis=1)]
        additional = []
        if 'context' in step_inputs:
            additional.append(step_inputs.context)
        for i in range(len(self.sub)):
            sub_inputs = py_utils.NestedMap(inputs=inputs + additional)
            sub_output, state1_i = self.sub[i].FProp(theta.sub[i],
                                                     prepared_inputs.sub[i],
                                                     sub_inputs, padding,
                                                     state0.sub[i])
            state1.sub.append(state1_i)
            output = sub_output.output
            if i >= self.params.residual_start >= 0:
                # residual_inputs contains the step input at residual_inputs[0].
                assert i + 1 - self.params.residual_stride < len(
                    residual_inputs)
                output += residual_inputs[i + 1 - self.params.residual_stride]
            residual_inputs.append(output)
            inputs = [output]
        return py_utils.NestedMap(output=output), state1
Exemple #15
0
        def _CellFn(unused_theta, unused_state0, inputs):
            """Recurrent cell function wrapper of body.FProp."""
            # Sets shapes for both theta and inputs to self.body.FProp.
            for dst, src in zip(inputs.args + inputs.theta.Flatten(),
                                list(args) + theta_stack.Flatten()):
                if src is not None:
                    dst.set_shape(tf.TensorShape(src.shape.as_list()[1:]))

            # Runs the actual body.FProp
            fprop_outputs = self.body.FProp(inputs.theta, *inputs.args)
            fprop_outputs = _ToTuple(fprop_outputs)
            assert len(fprop_outputs) == len(out_shapes)
            # Passes fprop outputs to the next layer through state.
            state1 = py_utils.NestedMap(outputs=list(fprop_outputs))
            return state1, py_utils.NestedMap()
    def FProp(self, theta, input_batch, state0=None):
        p = self.params
        src_segment_id = None
        with tf.name_scope(p.name):
            # Reshape to [t, b]
            inputs = py_utils.with_dependencies([
                py_utils.assert_shape_match(tf.shape(input_batch.ids),
                                            [-1, -1]),
                py_utils.assert_shape_match(tf.shape(input_batch.ids),
                                            tf.shape(input_batch.paddings))
            ], tf.transpose(input_batch.ids))
            paddings = tf.expand_dims(tf.transpose(input_batch.paddings), 2)

            # Setup streaming states.
            if not state0:
                state0 = self.zero_state(theta, tf.shape(inputs)[1])
            state1 = py_utils.NestedMap(rnn=[None] * p.num_lstm_layers)

            xs = self.emb.EmbLookup(theta.emb, inputs)
            xs = self.ApplyClipping(theta, xs)
            summary_utils.histogram('input_emb', xs)
            xs = self.dropout.FProp(theta.dropout, xs)
            ps = paddings
            # Now the rnn layers.
            outputs_list = []
            for i in range(0, p.num_lstm_layers):
                layer = self.rnn[i]
                ys, state1.rnn[i] = layer.FProp(theta.rnn[i],
                                                xs,
                                                ps,
                                                state0=state0.rnn[i])
                ys = self.dropout.FProp(theta.dropout, ys)
                if i >= p.residual_start:
                    xs += ys  # Residual skip
                    xs = self.ApplyClipping(theta, xs)
                else:
                    xs = ys
                outputs_list.append(xs)
                summary_utils.histogram('layer_out_%s' % i, xs)

            if p.is_transparent:
                xs = self.transparent_merger.FProp(theta.transparent_merger,
                                                   outputs_list)

            return py_utils.NestedMap(encoded=xs,
                                      padding=tf.squeeze(ps, [2]),
                                      segment_id=src_segment_id,
                                      state=state1)
    def _InputBatch(self):
        p = self.params

        @tf.function
        def ReadData():
            x, y = io_ops.restore_v2(p.ckpt, [p.data, p.label], [''] * 2,
                                     [p.data_dtype, p.label_dtype])
            # Always convert to float32.
            return tf.cast(x, tf.float32), tf.cast(y, tf.float32)

        # Loads data and label into memory and keep it around.
        data, label = ops.cached_call(f=ReadData.get_concrete_function(),
                                      T=[tf.float32, tf.float32])
        b, shape = self.InfeedBatchSize(), list(p.data_shape)
        data = tf.reshape(data, [-1] + shape)
        label = tf.reshape(label, [-1])
        label = py_utils.HasShape(label, [tf.shape(data)[0]])
        sample_ids = ops.random_permutation_sequence(
            num=p.num_samples,
            batch=b,
            repeat=p.repeat,
            seed=p.random_seed if p.random_seed else 0)
        n = tf.shape(sample_ids)[0]
        raw = py_utils.PadOrTrimTo(tf.gather(data, sample_ids), [b] + shape)
        ret = py_utils.NestedMap(
            raw=raw,
            data=self._Preprocess(raw),
            label=py_utils.PadOrTrimTo(tf.gather(label, sample_ids), [b]),
            weight=py_utils.PadOrTrimTo(tf.ones([n], dtype=tf.float32), [b]))
        if not py_utils.use_tpu():
            ret['sample_ids'] = sample_ids
        return ret
Exemple #18
0
    def ZeroState(self, theta, prepared_inputs, batch_size):
        """Returns the initial state given external inputs and batch size.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      prepared_inputs: External inputs returned by PrepareExternalInputs().
      batch_size: An int scalar representing the batch size of per-step inputs.

    Returns:
      A `.NestedMap` representing the initial state, which can be passed to
      FProp() for processing the first time step.
    """
        state0 = py_utils.NestedMap()
        for name, child in six.iteritems(self.children):
            if isinstance(child, (tuple, list)):
                output = []
                for i, sub in enumerate(child):
                    if isinstance(sub, Step):
                        output.append(
                            sub.ZeroState(theta[name][i],
                                          prepared_inputs[name][i],
                                          batch_size))
                if output:
                    if len(output) != len(child):
                        raise ValueError(
                            'Expecting child list to be instances of Step.')
                    state0[name] = type(child)(output)
            elif isinstance(child, Step):
                state0[name] = child.ZeroState(theta[name],
                                               prepared_inputs[name],
                                               batch_size)
        return state0
Exemple #19
0
    def BuildDataSource(self, data_source_from_file_pattern_fn):
        """Builds a Chaining Data Source.

    Args:
      data_source_from_file_pattern_fn: a function that takes file_pattern as an
        argument and returns an input batch.

    Returns:
      A NestedMap containing `data`, which is a tuple of tf.Tensor or
      `.NestedMap` of tf.Tensor.

    Raises:
      ValueError: If unknown token type.
    """
        p = self.params
        if not isinstance(p.file_patterns, list):
            raise ValueError('Expected a list, got %s' % (p.file_patterns, ))
        if not all(isinstance(x, six.string_types) for x in p.file_patterns):
            # Chaining doesn't work with weights or backprop filters, i.e. when
            # file_pattern param contains a list of
            # <file_pattern, weight, [bprop_variable_filter]> tuples.
            raise ValueError('Expected a list of strings, got %s' %
                             (p.file_patterns, ))

        for file_pattern in p.file_patterns:
            if ',' in file_pattern:
                raise ValueError(
                    'Can not use commas in file_pattern when chaining '
                    'is used. file_pattern: %s' % (file_pattern, ))
        ret = py_utils.NestedMap()
        ret.data = data_source_from_file_pattern_fn(','.join(p.file_patterns))
        ret.bprop_variable_filters = [''] * len(p.file_patterns)
        return ret
Exemple #20
0
    def BuildDataSource(self, data_source_from_file_pattern_fn, task_id=None):
        """Builds a simple, unweighted Data Source.

    Args:
      data_source_from_file_pattern_fn: a function that takes file_pattern as an
        argument and returns an input batch.

    Returns:
      A NestedMap containing `data`, which is a tuple of tf.Tensor or
      `.NestedMap` of tf.Tensor.
    """
        p = self.params
        if not isinstance(p.file_pattern, six.string_types):
            raise ValueError(
                'SimpleDataSource expects p.file_pattern to be a string.'
                ' To use multiple files use a comma separated string, '
                'e.g. \', \'.join(list_of_file_patterns)')

        if p.file_type:
            file_pattern = '{}:{}'.format(p.file_type, p.file_pattern)
        else:
            file_pattern = p.file_pattern

        ret = py_utils.NestedMap()
        ret.data = data_source_from_file_pattern_fn(file_pattern,
                                                    task_id=task_id)
        ret.bprop_variable_filters = ['']
        return ret
    def FProp(self, theta, inputs, *extra_inputs):
        """Forward pass.

    Args:
      theta: A NestedMap object containing weights' values of this layer and its
        children layers.
      inputs: A NestedMap: .split1 and .split2 corresponding to x1 and x2.
      *extra_inputs: additional inputs that will be passed to both f and g. No
        gradient will be computed for these inputs.

    Returns:
      outputs: A NestedMap: .split1 and .split2 corresponding to y1 and y2.
      f_seed: Scalar tensor. The step seed used in forward for the f block.
      g_seed: Scalar tensor. The step seed used in forward for the g block.

    """

        f_seed = py_utils.GetStepSeed()
        f_out = self.f_block.FProp(theta.f_block, inputs.split2, *extra_inputs)
        z1 = inputs.split1 + f_out
        g_seed = py_utils.GetStepSeed()
        g_out = self.g_block.FProp(theta.g_block, z1, *extra_inputs)
        y2 = inputs.split2 + g_out
        # This is essential to make dy1 independent to y2.
        y1 = tf.identity(z1)
        return py_utils.NestedMap(split1=y1, split2=y2), f_seed, g_seed
        def Bak(inputs, outputs, d_outputs):
            """Backward step."""
            del inputs  # unused
            output_acts, step_seeds = outputs
            d_outputs = d_outputs[0]

            d_layer_thetas = []
            for layer_idx in reversed(range(num_layers)):
                f_seed, g_seed = step_seeds[layer_idx]
                layer = self.sub_layers[layer_idx]
                layer_theta = theta.sub_layers[layer_idx]

                input_acts, d_inputs, d_theta = layer.ReverseAndGrad(
                    layer_theta, output_acts, d_outputs, f_seed, g_seed,
                    *extra_inputs)

                d_layer_thetas.append(d_theta)
                # Passes reconstructed inputs to the previous layer.
                output_acts = input_acts
                d_outputs = d_inputs
            py_utils.ResetStepSeed(final_step_seed)
            d_theta = py_utils.NestedMap(
                global_step=tf.zeros_like(initial_step_seed))
            d_theta.sub_layers = list(reversed(d_layer_thetas))

            extra_grads = [tf.zeros_like(t) for t in extra_inputs]
            return [
                tf.zeros_like(initial_step_seed), d_theta, d_inputs,
                extra_grads
            ]
Exemple #23
0
    def StoreTensor(self, path, tensor):
        """Add tensor 't' to 'named_tensors' at 'path'.

    A path may be a name or a path into a NestedMap. For instance,
    StoreTensor('a.b.c', [1]), is equivalent to this:
    {'a', {'b': {'c': [1]}}.

    NestedMaps will be created if they do not already exist, or modified if they
    do exist. However, tensors cannot be overwritten.

    Args:
      path: A path input a NestedMap.
      tensor: The item to store (may be a NestedMap or a tensor).
    """
        names = path.strip().split('.')
        named_tensors = self._named_tensors
        while len(names) > 1:
            n = names.pop(0)
            assert isinstance(named_tensors, py_utils.NestedMap), named_tensors
            if n not in named_tensors:
                named_tensors[n] = py_utils.NestedMap()
            named_tensors = named_tensors[n]
        n = names.pop(0)
        if n in named_tensors:
            raise ValueError('A tensor named "%s" (%s) already exists.' %
                             (n, path))
        named_tensors[n] = tensor
Exemple #24
0
 def FPropMeta(cls, p, *args):
     py_utils.CheckShapes(args)
     if p.n > 1:
         out_shapes = args[:p.n]
     else:
         out_shapes = (args[0], )
     return py_utils.NestedMap(flops=0, out_shapes=out_shapes)
Exemple #25
0
    def FProp(self, theta, prepared_inputs, inputs, padding, state0, **kwargs):
        """Runs a Step layer over multiple timesteps using Recurrent.

    Args:
      theta: A NestedMap containing weights' values of this layer and its
        children layers.
      prepared_inputs: External inputs returned by Step.PrepareExternalInputs().
      inputs: A NestedMap of inputs of shape [time, batch_size, dim].
      padding: A 0/1 float tensor of shape [time, batch_size]; 1.0 means that
        this batch element is empty in this step.
      state0: A NestedMap containing the initial recurrent state.
      **kwargs: Additional kwargs to pass to Recurrent.

    Returns:
      A tuple (outputs, state1).

      - outputs: A NestedMap containing the accumulated outputs of all steps,
        containing Tensors shaped [time, batch_size, dim].
      - state1: A NestedMap containing the accumulated recurrent states,
        containing Tensors shaped [time, batch_size, dim].
    """
        def RnnStep(recurrent_theta, recurrent_state0, recurrent_inputs):
            """Compute a single timestep."""
            output, state1 = self.step.FProp(
                theta=recurrent_theta.theta,
                prepared_inputs=recurrent_theta.prepared_inputs,
                step_inputs=recurrent_inputs.inputs,
                padding=recurrent_inputs.padding,
                state0=recurrent_state0.state)
            recurrent_state1 = py_utils.NestedMap(output=output, state=state1)
            return recurrent_state1, py_utils.NestedMap()

        # In order to pass Step outputs through Recurrent, they need to be
        # included as part of state.
        output0, _ = self.step.FProp(theta.step, prepared_inputs,
                                     inputs.Transform(lambda x: x[0]),
                                     padding[0], state0)

        accumulated_states, _ = recurrent.Recurrent(
            theta=py_utils.NestedMap(theta=theta.step,
                                     prepared_inputs=prepared_inputs),
            state0=py_utils.NestedMap(output=output0, state=state0),
            inputs=py_utils.NestedMap(inputs=inputs, padding=padding),
            cell_fn=RnnStep,
            **kwargs)

        return accumulated_states.output, accumulated_states.state
Exemple #26
0
    def FProp(self, theta, *args):
        p = self.params
        # Collects all variable key and values into sets.
        theta_stack = _MaybeStackExtraTheta(theta.body, self.body.vars,
                                            p.repeat)

        def _ArgsToState(arg_list):
            """Returns a NestedMap from a list of FProp args."""
            state = py_utils.NestedMap()
            # Maintains a mapping from arg_idx to tensor. states cannot contains
            # None tensors.
            for idx in range(len(args)):
                if arg_list[idx] is not None:
                    state['_s{}'.format(idx)] = arg_list[idx]
            return state

        def _StateToArgs(state):
            """Returns a list of FProp args from a NestedMap."""
            arg_list = []
            for idx in range(len(args)):
                attr = '_s{}'.format(idx)
                arg_list.append(state[attr] if attr in state else None)
                if arg_list[-1] is not None:
                    arg_list[-1].set_shape(args[idx].shape)
            return arg_list

        def _CellFn(unused_theta, state0, theta_i):
            """Recurrent cell function wrapper of body.FProp."""
            # Retrieves fprop arguments from state and sets shapes.
            frop_inputs = _StateToArgs(state0)

            # Sets shapes for theta_i as well.
            for dst, src in zip(theta_i.Flatten(), theta_stack.Flatten()):
                if src is not None:
                    dst.set_shape(tf.TensorShape(src.shape.as_list()[1:]))

            # Runs the actual body.FProp
            frop_outputs = self.body.FProp(theta_i, *frop_inputs)
            frop_outputs = _ToTuple(frop_outputs)
            assert len(frop_outputs) == len(frop_inputs)

            # Passes fprop outputs to the next layer through state.
            state1 = _ArgsToState(frop_outputs)
            return state1, py_utils.NestedMap()

        with tf.name_scope(p.name):
            # Add FProp arg list to state0.
            state0 = _ArgsToState(args)
            # Runs body.FProp k times using Recurrent where k = dim 0 of var_nmap.
            _, state1 = recurrent.Recurrent(
                theta=py_utils.NestedMap(),
                state0=state0,
                inputs=theta_stack,  # Pass cell_fn theta through inputs.
                cell_fn=_CellFn)

            # Retrieves fprop outputs from state1 and sets shapes.
            output_tensors = _StateToArgs(state1)
            return output_tensors[0] if len(args) == 1 else tuple(
                output_tensors)
Exemple #27
0
 def FPropMeta(cls, p, inputs, paddings):
     py_utils.CheckShapes((inputs, paddings))
     b, t, f, _ = inputs
     assert f == 1
     oc = p.filter_shape[2] * p.filter_shape[3] * p.weight_tiling_factor
     outputs = tshape.Shape([b, t, f, oc])
     flops = b * t * f * p.filter_shape[0] * oc * 5
     return py_utils.NestedMap(flops=flops, out_shapes=(outputs, paddings))
Exemple #28
0
    def FProp(self, theta, prepared_inputs, step_inputs, padding, state0):
        """A single inference step for this step graph.

    Args:
      theta: variables used by sub-steps.
      prepared_inputs: A NestedMap containing external_inputs that were
        pre-processed by the PrepareExternalInputs method of each sub-step. The
        keys are the names of the sub-steps.
      step_inputs: A NestedMap of [batch, ...] tensors. The structure of this
        depends on the graph implementation.
      padding: A 0/1 float tensor of shape [batch_size]; 1.0 means that this
        batch element is empty in this step.
      state0: A NestedMap of state variables produced by either ZeroState or a
        previous invocation of this FProp step. The keys are the names of the
        sub-steps.

    Returns:
      (output, state1), both of which are NestedMaps.
      output is implementation-dependent and is defined by the output_signature
      parameter.
      state1 is a NestedMap where the keys are names of sub-steps and the values
      are state outputs from their FProp methods.
    """
        p = self.params
        graph_tensors = builder_layers.GraphTensors()
        graph_tensors.StoreTensor('prepared_inputs', prepared_inputs)
        graph_tensors.StoreTensor('step_inputs', step_inputs)
        state1 = py_utils.NestedMap()
        with tf.name_scope(p.name):
            for seq in self._seq:
                tf.logging.vlog(1, 'GraphStep: call %s', seq.name)
                external = None
                if seq.external_signature:
                    external = prepared_inputs[seq.name]
                template = py_utils.NestedMap(inputs=seq.signature.inputs)
                packed = template.Transform(graph_tensors.GetTensor)
                input_args = packed.inputs[0]
                out, seq_state1 = seq.step.FProp(theta[seq.name], external,
                                                 input_args, padding,
                                                 state0[seq.name])
                graph_tensors.StoreTensor(seq.signature.outputs[0], out)
                state1[seq.name] = seq_state1
        template = py_utils.NestedMap(inputs=self.output_signature.inputs)
        output_tensors = template.Transform(graph_tensors.GetTensor).inputs[0]
        return output_tensors, state1
    def FProp(self, theta, input_batch):
        """Encodes source as represented by `inputs` and `paddings`.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      input_batch: A `.NestedMap` with fields:
        - ids: The inputs tensor. It is expected to be of shape [batch, time].
        - paddings: The paddings tensor. Expected shape [batch, time].

    Returns:
      A NestedMap containing:

      - encoded: The encoded features, a tensor of shape [time, batch, depth]
      - padding: of shape [time, batch]
      - segment_id: [time, batch] if packed inputs are supported by the model
        (and all layers), or None otherwise.
    """
        p = self.params
        src_segment_id = None
        with tf.name_scope(p.name):
            # Now the rnn layers.
            inputs = py_utils.with_dependencies([
                py_utils.assert_shape_match(tf.shape(input_batch.ids),
                                            [-1, -1]),
                py_utils.assert_shape_match(tf.shape(input_batch.ids),
                                            tf.shape(input_batch.paddings))
            ], tf.transpose(input_batch.ids))
            paddings = tf.expand_dims(tf.transpose(input_batch.paddings), 2)
            xs = self.emb.EmbLookup(theta.emb, inputs)
            xs = self.ApplyClipping(theta, xs)
            self._emb_out = xs
            ps = paddings
            # When cc_schedule is specified, make sure lstm_tpl is QuantizedLSTMCell
            # with the same cc_schedule so that the RNN layer output is within
            # clipping range.
            xs = self.rnn[0].FProp(theta.rnn[0], xs, ps)
            xs = self.dropout.FProp(theta.dropout, xs)
            for i in range(1, p.num_lstm_layers):
                layer = self.rnn[i]
                ys, _ = layer.FProp(theta.rnn[i], xs, ps)
                ys = self.dropout.FProp(theta.dropout, ys)
                if hasattr(layer.params, 'cell'):
                    layer_params = layer.params.cell
                else:
                    layer_params = layer.params
                if layer_params.num_input_nodes == layer_params.num_output_nodes:
                    xs += ys  # Residual skip
                    xs = self.ApplyClipping(theta, xs)
                else:
                    # When cc_schedule is specified, make sure lstm_tpl is
                    # QuantizedLSTMCell with the same cc_schedule so that the RNN layer
                    # output is within clipping range.
                    xs = ys
            return py_utils.NestedMap(encoded=xs,
                                      padding=tf.squeeze(ps, [2]),
                                      segment_id=src_segment_id)
 def _InputBatch(self):
   length = tf.reduce_prod(self.shape)
   counter = summary_utils.StatsCounter('CountingInputGenerator')
   new_value = tf.cast(counter.IncBy(length), dtype=tf.int32) - length
   new_value = tf.stop_gradient(new_value)
   values = new_value + tf.range(length)
   shaped_values = tf.reshape(tf.cast(values, dtype=tf.float32), self.shape)
   targets = tf.reduce_sum(shaped_values, axis=0)
   return py_utils.NestedMap(src_ids=shaped_values, tgt_ids=targets)