コード例 #1
0
ファイル: pooling.py プロジェクト: GiveMeLuna/tensorflow
 def compute_output_shape(self, input_shape):
   input_shape = tensor_shape.TensorShape(input_shape).as_list()
   if self.data_format == 'channels_last':
     return tensor_shape.TensorShape([input_shape[0], input_shape[4]])
   else:
     return tensor_shape.TensorShape([input_shape[0], input_shape[1]])
コード例 #2
0
def _SegmentReductionShape(op):
    """Common shape function for segment reduction ops."""
    data_shape = op.inputs[0].get_shape()
    segment_ids_shape = op.inputs[1].get_shape()
    segment_ids_shape.assert_has_rank(1)
    return [tensor_shape.TensorShape([None]).concatenate(data_shape[1:])]
コード例 #3
0
def _experimental_predict_loop(model, iterator, verbose=0, steps=None):
    """Predict loop for predicting with TPU DistributionStrategy.

  Arguments:
      model: Keras Model instance.
      iterator: Iterator for input data.
      verbose: Integer, Verbosity mode 0 or 1.
      steps: Total number of steps (batches of samples)
          before declaring `_predict_loop` finished.
          Ignored with the default value of `None`.

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions
      (if the model has multiple outputs).
  """
    current_strategy = model._distribution_strategy
    K.get_session().run(current_strategy.initialize())

    # TODO(priyag, sourabhbajaj): This should likely not be hardcoded here.
    K.set_learning_phase(0)

    def _per_device_predict_function(model):
        model._make_predict_function()
        return (model.predict_function.inputs, model.predict_function.outputs,
                model.predict_function.updates_op,
                model.predict_function.session_kwargs)

    def step_fn(ctx, *inputs):
        """Clones the model and calls make_predict_function."""

        # TODO(priyag, sourabhbajaj): The model gets cloned every time
        # fit/test/predict is called. We should look into caching this keyed on
        # input shapes.
        clone_model_on_towers(model,
                              current_strategy,
                              make_callback_model=False,
                              inputs=inputs,
                              mode=_Mode.PREDICT)

        (grouped_inputs, grouped_outputs, grouped_updates,
         grouped_session_args) = current_strategy.call_for_each_tower(
             _per_device_predict_function, model._grouped_model_predict)

        (all_inputs, all_outputs, all_updates,
         all_session_args) = distributed_training_utils.unwrap_values(
             current_strategy, grouped_inputs, grouped_outputs,
             grouped_updates, grouped_session_args)

        combined_fn = K.Function(all_inputs,
                                 all_outputs,
                                 updates=all_updates,
                                 name='distributed_predict_function',
                                 **all_session_args)

        for label, output in zip(model.output_names, combined_fn.outputs):
            ctx.set_last_step_output(label, output)

        return combined_fn.updates_op

    # Add initial dummy values for outputs.
    initial_loop_values = {}
    batch_dimension = distributed_training_utils.get_batch_dimension(iterator)
    for name, tensor in zip(model.output_names, model.outputs):
        # TODO(priyag): This is a workaround as we do not know the batch dimension
        # of the model's output at this point.
        shape = tensor_shape.TensorShape(tensor.shape.dims)
        shape.dims = [batch_dimension] + shape.dims[1:]
        initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype)

    with current_strategy.scope():
        # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed.
        ctx = current_strategy.run_steps_on_dataset(
            step_fn,
            iterator,
            iterations=1,
            initial_loop_values=initial_loop_values)

    predict_op = ctx.run_op
    output_tensors = ctx.last_step_outputs

    if verbose == 1:
        progbar = Progbar(target=steps)

    # Copy the weights from the original model to each of the replicated models.
    orig_model_weights = model.get_weights()
    with current_strategy.scope():
        distributed_model = current_strategy.unwrap(
            model._grouped_model_predict)[0]
        distributed_training_utils.set_weights(current_strategy,
                                               distributed_model,
                                               orig_model_weights)

    assert steps is not None
    # Since we do not know how many samples we will see, we cannot pre-allocate
    # the returned Numpy arrays. Instead, we store one array per batch seen
    # and concatenate them upon returning.
    unconcatenated_outs = [[] for _ in model.outputs]
    for step in range(steps):
        _, batch_outs = K.get_session().run([predict_op, output_tensors])
        # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy.
        for i, label in enumerate(model.output_names):
            unconcatenated_outs[i].extend(batch_outs[label])
        if verbose >= 1:
            progbar.update(step + 1)

    K.get_session().run(current_strategy.finalize())

    if len(unconcatenated_outs) == 1:
        return np.concatenate(unconcatenated_outs[0], axis=0)
    return [
        np.concatenate(unconcatenated_outs[i], axis=0)
        for i in range(len(unconcatenated_outs))
    ]
コード例 #4
0
 def _batch(self, batch_size):
   return SparseTensorSpec(
       tensor_shape.TensorShape([batch_size]).concatenate(self._shape),
       self._dtype)
コード例 #5
0
 def _event_shape(self):
     return tensor_shape.TensorShape([])
コード例 #6
0
 def do_decode(self, value, decode_fn):
   del decode_fn
   return tensor_shape.TensorShape(value.tensor_shape_value)
コード例 #7
0
 def testRepr(self):
     spec = TwoTensorsSpec([5, 3], dtypes.int32, None, dtypes.bool)
     self.assertEqual(
         repr(spec), "TwoTensorsSpec(%r, %r, %r, %r, %r)" %
         (tensor_shape.TensorShape([5, 3]), dtypes.int32,
          tensor_shape.TensorShape(None), dtypes.bool, "red"))
コード例 #8
0
ファイル: capsule.py プロジェクト: RNNCCL/capsules-tensorflow
 def _compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     output_shape = tensor_shape.TensorShape(
         [input_shape[0], self.units, self.dim])
     return output_shape
コード例 #9
0
 def test_fixed_shards_partitioner(self):
     partitioner = sharded_variable.FixedShardsPartitioner(num_shards=2)
     got = partitioner(tensor_shape.TensorShape([10, 3]), dtypes.float32)
     self.assertAllEqual(got, [2, 1])
コード例 #10
0
  def __init__(self, dtype, size=None, dynamic_size=None,
               clear_after_read=None, tensor_array_name=None, handle=None,
               flow=None, infer_shape=True, element_shape=None, name=None):
    """Construct a new TensorArray or wrap an existing TensorArray handle.

    A note about the parameter `name`:

    The name of the `TensorArray` (even if passed in) is uniquified: each time
    a new `TensorArray` is created at runtime it is assigned its own name for
    the duration of the run.  This avoids name collisions if a `TensorArray`
    is created within a `while_loop`.

    Args:
      dtype: (required) data type of the TensorArray.
      size: (optional) int32 scalar `Tensor`: the size of the TensorArray.
        Required if handle is not provided.
      dynamic_size: (optional) Python bool: If true, writes to the TensorArray
        can grow the TensorArray past its initial size.  Default: False.
      clear_after_read: Boolean (optional, default: True).  If True, clear
        TensorArray values after reading them.  This disables read-many
        semantics, but allows early release of memory.
      tensor_array_name: (optional) Python string: the name of the TensorArray.
        This is used when creating the TensorArray handle.  If this value is
        set, handle should be None.
      handle: (optional) A `Tensor` handle to an existing TensorArray.  If this
        is set, tensor_array_name should be None.
      flow: (optional) A float `Tensor` scalar coming from an existing
        `TensorArray.flow`.
      infer_shape: (optional, default: True) If True, shape inference
        is enabled.  In this case, all elements must have the same shape.
      element_shape: (optional, default: None) A `TensorShape` object specifying
        the shape constraints of each of the elements of the TensorArray.
        Need not be fully defined.
      name: A name for the operation (optional).

    Raises:
      ValueError: if both handle and tensor_array_name are provided.
      TypeError: if handle is provided but is not a Tensor.
    """
    if handle is not None and tensor_array_name:
      raise ValueError(
          "Cannot construct with both handle and tensor_array_name")
    if handle is not None and not isinstance(handle, ops.Tensor):
      raise TypeError("Handle must be a Tensor")
    if handle is None and size is None:
      raise ValueError("Size must be provided if handle is not provided")
    if handle is not None and size is not None:
      raise ValueError("Cannot provide both a handle and size "
                       "at the same time")
    if handle is not None and element_shape is not None:
      raise ValueError("Cannot provide both a handle and element_shape "
                       "at the same time")
    if handle is not None and dynamic_size is not None:
      raise ValueError("Cannot provide both a handle and dynamic_size "
                       "at the same time")
    if handle is not None and clear_after_read is not None:
      raise ValueError("Cannot provide both a handle and clear_after_read "
                       "at the same time")

    if clear_after_read is None:
      clear_after_read = True
    dynamic_size = dynamic_size or False

    self._dtype = dtype
    # Record the current static shape for the array elements. The element
    # shape is defined either by `element_shape` or the shape of the tensor
    # of the first write. If `infer_shape` is true, all writes checks for
    # shape equality.
    if element_shape is None:
      self._infer_shape = infer_shape
      self._element_shape = []
    else:
      self._infer_shape = True
      self._element_shape = [tensor_shape.TensorShape(element_shape)]
    with ops.name_scope(name, "TensorArray", [handle, size, flow]) as scope:
      if handle is not None:
        self._handle = handle
      else:
        if flow is not None:
          with ops.colocate_with(flow):
            self._handle = gen_data_flow_ops._tensor_array_v2(
                dtype=dtype, size=size, element_shape=element_shape,
                dynamic_size=dynamic_size,
                clear_after_read=clear_after_read,
                tensor_array_name=tensor_array_name, name=scope)
        else:
          # Construct the TensorArray with an empty device.  The first
          # write into the TensorArray from a Tensor with a set device
          # will retroactively set the device value of this op.
          with ops.device(None), ops.colocate_with(None, ignore_existing=True):
            self._handle = gen_data_flow_ops._tensor_array_v2(
                dtype=dtype, size=size, element_shape=element_shape,
                dynamic_size=dynamic_size,
                clear_after_read=clear_after_read,
                tensor_array_name=tensor_array_name, name=scope)
      if flow is not None:
        self._flow = flow
      else:
        with ops.colocate_with(self._handle):
          self._flow = constant_op.constant(0, dtype=_dtypes.float32)
コード例 #11
0
ファイル: xla_ops_test.py プロジェクト: zzpmiracle/tensorflow
  def testPadShapeInference(self):
    a = array_ops.placeholder(np.float32, shape=(2, 3))

    c = xla.pad(
        a,
        padding_value=7,
        padding_low=[2, 1],
        padding_high=[1, 2],
        padding_interior=[1, 4])

    self.assertEqual(c.shape, tensor_shape.TensorShape([6, 14]))

    c = xla.pad(
        a,
        padding_value=7,
        padding_low=[2, -2],
        padding_high=[1, -2],
        padding_interior=[1, 2])

    self.assertEqual(c.shape, tensor_shape.TensorShape([6, 3]))

    c = xla.pad(
        array_ops.placeholder(np.float32, shape=(None, 2)),
        padding_value=7,
        padding_low=[0, 1],
        padding_high=[0, 2],
        padding_interior=[0, 4])
    self.assertEqual(c.shape.as_list(), [None, 9])

    # 0-sized input dimension and interior padding
    c = xla.pad(
        array_ops.placeholder(np.float32, shape=(2, 0)),
        padding_value=7,
        padding_low=[2, 1],
        padding_high=[1, 1],
        padding_interior=[1, 2])

    self.assertEqual(c.shape, tensor_shape.TensorShape([6, 2]))

    with self.assertRaisesRegex(
        ValueError, 'padding_value input must be scalar, found rank 1 '):
      xla.pad(
          a,
          padding_value=[0, 1],
          padding_low=[0, 0],
          padding_high=[0, 0],
          padding_interior=[0, 0])

    with self.assertRaisesRegex(ValueError,
                                'padding_low must be a 1D tensor of size 2 '):
      xla.pad(
          a,
          padding_value=7,
          padding_low=[0, 0, 0],
          padding_high=[0, 0],
          padding_interior=[0, 0])

    with self.assertRaisesRegex(ValueError,
                                'padding_high must be a 1D tensor of size 2 '):
      xla.pad(
          a,
          padding_value=7,
          padding_low=[0, 0],
          padding_high=[0, 0, 0],
          padding_interior=[0, 0])

    with self.assertRaisesRegex(
        ValueError, 'padding_interior must be a 1D tensor of size 2 '):
      xla.pad(
          a,
          padding_value=7,
          padding_low=[0, 0],
          padding_high=[0, 0],
          padding_interior=[0])

    with self.assertRaisesRegex(
        ValueError,
        'padding_interior must contain only non-negative values, found -2 '):
      xla.pad(
          a,
          padding_value=7,
          padding_low=[0, 0],
          padding_high=[0, 0],
          padding_interior=[-2, 0])

    with self.assertRaisesRegex(
        ValueError, 'resulting padded dimension has negative size -1 '):
      xla.pad(
          a,
          padding_value=7,
          padding_low=[-3, 0],
          padding_high=[0, 0],
          padding_interior=[0, 0])
コード例 #12
0
    def _build_from_signature(self, query, value, key=None):
        """Builds layers and variables.

    Once the method is called, self._built_from_signature will be set to True.

    Args:
      query: query tensor or TensorShape.
      value: value tensor or TensorShape.
      key: key tensor or TensorShape.
    """
        self._built_from_signature = True
        if hasattr(query, "shape"):
            query_shape = tensor_shape.TensorShape(query.shape)
        else:
            query_shape = query
        if hasattr(value, "shape"):
            value_shape = tensor_shape.TensorShape(value.shape)
        else:
            value_shape = value
        if key is None:
            key_shape = value_shape
        elif hasattr(key, "shape"):
            key_shape = tensor_shape.TensorShape(key.shape)
        else:
            key_shape = key

        common_kwargs = dict(kernel_initializer=self._kernel_initializer,
                             bias_initializer=self._bias_initializer,
                             kernel_regularizer=self._kernel_regularizer,
                             bias_regularizer=self._bias_regularizer,
                             activity_regularizer=self._activity_regularizer,
                             kernel_constraint=self._kernel_constraint,
                             bias_constraint=self._bias_constraint)
        # Any setup work performed only once should happen in an `init_scope`
        # to avoid creating symbolic Tensors that will later pollute any eager
        # operations.
        with tf_utils.maybe_init_scope(self):
            free_dims = query_shape.rank - 1
            einsum_equation, bias_axes, output_rank = _build_proj_equation(
                free_dims, bound_dims=1, output_dims=2)
            self._query_dense = einsum_dense.EinsumDense(
                einsum_equation,
                output_shape=_get_output_shape(
                    output_rank - 1, [self._num_heads, self._key_dim]),
                bias_axes=bias_axes if self._use_bias else None,
                name="query",
                **common_kwargs)
            einsum_equation, bias_axes, output_rank = _build_proj_equation(
                key_shape.rank - 1, bound_dims=1, output_dims=2)
            self._key_dense = einsum_dense.EinsumDense(
                einsum_equation,
                output_shape=_get_output_shape(
                    output_rank - 1, [self._num_heads, self._key_dim]),
                bias_axes=bias_axes if self._use_bias else None,
                name="key",
                **common_kwargs)
            einsum_equation, bias_axes, output_rank = _build_proj_equation(
                value_shape.rank - 1, bound_dims=1, output_dims=2)
            self._value_dense = einsum_dense.EinsumDense(
                einsum_equation,
                output_shape=_get_output_shape(
                    output_rank - 1, [self._num_heads, self._value_dim]),
                bias_axes=bias_axes if self._use_bias else None,
                name="value",
                **common_kwargs)

            # Builds the attention computations for multi-head dot product attention.
            # These computations could be wrapped into the keras attention layer once
            # it support mult-head einsum computations.
            self._build_attention(output_rank)
            if self._output_shape:
                if not isinstance(self._output_shape, collections.abc.Sized):
                    output_shape = [self._output_shape]
                else:
                    output_shape = self._output_shape
            else:
                output_shape = [query_shape[-1]]
            einsum_equation, bias_axes, output_rank = _build_proj_equation(
                free_dims, bound_dims=2, output_dims=len(output_shape))
            self._output_dense = einsum_dense.EinsumDense(
                einsum_equation,
                output_shape=_get_output_shape(output_rank - 1, output_shape),
                bias_axes=bias_axes if self._use_bias else None,
                name="attention_output",
                **common_kwargs)
コード例 #13
0
 def f():
   x = constant_op.constant([[1, 2], [3, 4]])
   out = math_ops.matmul(v, x)
   self.assertEqual(out.get_shape(), tensor_shape.TensorShape([2, 2]))
コード例 #14
0
ファイル: structure.py プロジェクト: vic-yes/tensorflow
 def _batch(self, batch_size):
   return SparseTensorStructure(
       self._dtype,
       tensor_shape.TensorShape([batch_size]).concatenate(self._dense_shape))
コード例 #15
0
    def from_string_handle(string_handle,
                           output_types,
                           output_shapes=None,
                           output_classes=None):
        """Creates a new, uninitialized `Iterator` based on the given handle.

    This method allows you to define a "feedable" iterator where you can choose
    between concrete iterators by feeding a value in a `tf.Session.run` call.
    In that case, `string_handle` would be a `tf.compat.v1.placeholder`, and you
    would
    feed it with the value of `tf.data.Iterator.string_handle` in each step.

    For example, if you had two iterators that marked the current position in
    a training dataset and a test dataset, you could choose which to use in
    each step as follows:

    ```python
    train_iterator = tf.data.Dataset(...).make_one_shot_iterator()
    train_iterator_handle = sess.run(train_iterator.string_handle())

    test_iterator = tf.data.Dataset(...).make_one_shot_iterator()
    test_iterator_handle = sess.run(test_iterator.string_handle())

    handle = tf.compat.v1.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(
        handle, train_iterator.output_types)

    next_element = iterator.get_next()
    loss = f(next_element)

    train_loss = sess.run(loss, feed_dict={handle: train_iterator_handle})
    test_loss = sess.run(loss, feed_dict={handle: test_iterator_handle})
    ```

    Args:
      string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates to
        a handle produced by the `Iterator.string_handle()` method.
      output_types: A (nested) structure of `tf.DType` objects corresponding to
        each component of an element of this dataset.
      output_shapes: (Optional.) A (nested) structure of `tf.TensorShape`
        objects corresponding to each component of an element of this dataset.
        If omitted, each component will have an unconstrainted shape.
      output_classes: (Optional.) A (nested) structure of Python `type` objects
        corresponding to each component of an element of this iterator. If
        omitted, each component is assumed to be of type `tf.Tensor`.

    Returns:
      An `Iterator`.
    """
        output_types = nest.map_structure(dtypes.as_dtype, output_types)
        if output_shapes is None:
            output_shapes = nest.map_structure(
                lambda _: tensor_shape.TensorShape(None), output_types)
        else:
            output_shapes = nest.map_structure_up_to(output_types,
                                                     tensor_shape.as_shape,
                                                     output_shapes)
        if output_classes is None:
            output_classes = nest.map_structure(lambda _: ops.Tensor,
                                                output_types)
        nest.assert_same_structure(output_types, output_shapes)
        output_structure = structure.convert_legacy_structure(
            output_types, output_shapes, output_classes)
        string_handle = ops.convert_to_tensor(string_handle,
                                              dtype=dtypes.string)
        iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
            string_handle,
            output_types=structure.get_flat_tensor_types(output_structure),
            output_shapes=structure.get_flat_tensor_shapes(output_structure))
        return Iterator(iterator_resource, None, output_types, output_shapes,
                        output_classes)
コード例 #16
0
  def __init__(self, params):
    """Construct PVSSTCell.

    Args:
      params: hyperparameters for PVSSTCell

    """
    super(PVSSTCell, self).__init__(params['name'])
    self._input_shape = params['input_shape']
    self._output_channels = params['output_channels']
    self._N_PV = params['N_PV']
    self._N_SST = params['N_SST']
    self._kernel_size = params['kernel_size']
    self._kernel_size_inh = params['kernel_size_inh'] 
    # kernel_size_inh is a list of kernel size for different connections
    # the meaning of each element is defined here:
    self._kernel_size_pv_in = self._kernel_size_inh[0]
    self._kernel_size_sst_in = self._kernel_size_inh[1]
    self._kernel_size_fb = self._kernel_size_inh[2]
    self._kernel_size_hid = self._kernel_size_inh[3]
    # strides_fb is as calculated before in the file convinh_model.py
    self._strides_fb = params['strides_fb']
    self._strides = params['strides']
    # act_fn: string, activation function,eg:'gate_relu_cell_relu_kernel_abs'
    self._act_fn = params['act_fn']
    # normalize: string, specifying batch/layer normalization and its position
    self._normalize = params['normalize']
    # pvsst_circuit: string, eg: '','flip_sign','SstNoFF'
    self._pvsst_circuit = params['pvsst_circuit']
    # gating: string, gating mechanism, eg: 'in_mult_out_subt'
    self._gating = params['gating']
    self._data_format = params['data_format']
    self._skip_connection = False
    self._padding='SAME'
    self._total_output_channels = self._output_channels
    if self._skip_connection:
      self._total_output_channels += self._input_shape[-1]
    if self._skip_connection and (self._strides != 1):
      raise ValueError("stride should be 1 if skip_connection is True")   
    # shape calculation
    kernel_H = tf.Dimension(self._kernel_size)
    strides_H = tf.Dimension(self._strides)
    state_H = self._input_shape[1]
    if self._padding == 'VALID':
      state_H = state_H - kernel_H + 1
    state_H = (state_H + strides_H - 1) // strides_H
    state1_C = self._output_channels
    if ("remove_OG" in self._pvsst_circuit) and ("inside" not in self._normalize):
      state1_C = self._N_SST
    if self._data_format=='channels_last':
      state0_size = tensor_shape.TensorShape(
          [state_H, state_H] + [self._output_channels])
      state1_size = tensor_shape.TensorShape(
          [state_H, state_H] + [state1_C])
      self._state_size = rnn_cell_impl.LSTMStateTuple(state0_size, state1_size)
      self._output_size = tensor_shape.TensorShape(
          [state_H, state_H] + [self._total_output_channels])
    elif self._data_format=='channels_first':
      state0_size = tensor_shape.TensorShape(
          [self._output_channels] + [state_H, state_H])
      state1_size = tensor_shape.TensorShape(
          [state1_C] + [state_H, state_H])
      self._state_size = rnn_cell_impl.LSTMStateTuple(state0_size, state1_size)
      self._output_size = tensor_shape.TensorShape(
          [self._total_output_channels] + [state_H, state_H])
    else:
      raise ValueError("data_format not valid: {}".format(self._data_format)) 
コード例 #17
0
    def _sample_n(self, n, seed=None):
        if self._use_static_graph:
            # This sampling approach is almost the same as the approach used by
            # `MixtureSameFamily`. The differences are due to having a list of
            # `Distribution` objects rather than a single object, and maintaining
            # random seed management that is consistent with the non-static code path.
            samples = []
            cat_samples = self.cat.sample(n, seed=seed)
            for c in range(self.num_components):
                seed = distribution_util.gen_new_seed(seed, "mixture")
                samples.append(self.components[c].sample(n, seed=seed))
            x = array_ops.stack(samples, -self._static_event_shape.ndims -
                                1)  # [n, B, k, E]
            npdt = x.dtype.as_numpy_dtype
            mask = array_ops.one_hot(
                indices=cat_samples,  # [n, B]
                depth=self._num_components,  # == k
                on_value=np.ones([], dtype=npdt),
                off_value=np.zeros([], dtype=npdt))  # [n, B, k]
            mask = distribution_utils.pad_mixture_dimensions(
                mask, self, self._cat,
                self._static_event_shape.ndims)  # [n, B, k, [1]*e]
            return math_ops.reduce_sum(
                x * mask,
                axis=-1 - self._static_event_shape.ndims)  # [n, B, E]

        with ops.control_dependencies(self._assertions):
            n = ops.convert_to_tensor(n, name="n")
            static_n = tensor_util.constant_value(n)
            n = int(static_n) if static_n is not None else n
            cat_samples = self.cat.sample(n, seed=seed)

            static_samples_shape = cat_samples.get_shape()
            if static_samples_shape.is_fully_defined():
                samples_shape = static_samples_shape.as_list()
                samples_size = static_samples_shape.num_elements()
            else:
                samples_shape = array_ops.shape(cat_samples)
                samples_size = array_ops.size(cat_samples)
            static_batch_shape = self.batch_shape
            if static_batch_shape.is_fully_defined():
                batch_shape = static_batch_shape.as_list()
                batch_size = static_batch_shape.num_elements()
            else:
                batch_shape = self.batch_shape_tensor()
                batch_size = math_ops.reduce_prod(batch_shape)
            static_event_shape = self.event_shape
            if static_event_shape.is_fully_defined():
                event_shape = np.array(static_event_shape.as_list(),
                                       dtype=np.int32)
            else:
                event_shape = self.event_shape_tensor()

            # Get indices into the raw cat sampling tensor. We will
            # need these to stitch sample values back out after sampling
            # within the component partitions.
            samples_raw_indices = array_ops.reshape(
                math_ops.range(0, samples_size), samples_shape)

            # Partition the raw indices so that we can use
            # dynamic_stitch later to reconstruct the samples from the
            # known partitions.
            partitioned_samples_indices = data_flow_ops.dynamic_partition(
                data=samples_raw_indices,
                partitions=cat_samples,
                num_partitions=self.num_components)

            # Copy the batch indices n times, as we will need to know
            # these to pull out the appropriate rows within the
            # component partitions.
            batch_raw_indices = array_ops.reshape(
                array_ops.tile(math_ops.range(0, batch_size), [n]),
                samples_shape)

            # Explanation of the dynamic partitioning below:
            #   batch indices are i.e., [0, 1, 0, 1, 0, 1]
            # Suppose partitions are:
            #     [1 1 0 0 1 1]
            # After partitioning, batch indices are cut as:
            #     [batch_indices[x] for x in 2, 3]
            #     [batch_indices[x] for x in 0, 1, 4, 5]
            # i.e.
            #     [1 1] and [0 0 0 0]
            # Now we sample n=2 from part 0 and n=4 from part 1.
            # For part 0 we want samples from batch entries 1, 1 (samples 0, 1),
            # and for part 1 we want samples from batch entries 0, 0, 0, 0
            #   (samples 0, 1, 2, 3).
            partitioned_batch_indices = data_flow_ops.dynamic_partition(
                data=batch_raw_indices,
                partitions=cat_samples,
                num_partitions=self.num_components)
            samples_class = [None for _ in range(self.num_components)]

            for c in range(self.num_components):
                n_class = array_ops.size(partitioned_samples_indices[c])
                seed = distribution_util.gen_new_seed(seed, "mixture")
                samples_class_c = self.components[c].sample(n_class, seed=seed)

                # Pull out the correct batch entries from each index.
                # To do this, we may have to flatten the batch shape.

                # For sample s, batch element b of component c, we get the
                # partitioned batch indices from
                # partitioned_batch_indices[c]; and shift each element by
                # the sample index. The final lookup can be thought of as
                # a matrix gather along locations (s, b) in
                # samples_class_c where the n_class rows correspond to
                # samples within this component and the batch_size columns
                # correspond to batch elements within the component.
                #
                # Thus the lookup index is
                #   lookup[c, i] = batch_size * s[i] + b[c, i]
                # for i = 0 ... n_class[c] - 1.
                lookup_partitioned_batch_indices = (
                    batch_size * math_ops.range(n_class) +
                    partitioned_batch_indices[c])
                samples_class_c = array_ops.reshape(
                    samples_class_c,
                    array_ops.concat([[n_class * batch_size], event_shape], 0))
                samples_class_c = array_ops.gather(
                    samples_class_c,
                    lookup_partitioned_batch_indices,
                    name="samples_class_c_gather")
                samples_class[c] = samples_class_c

            # Stitch back together the samples across the components.
            lhs_flat_ret = data_flow_ops.dynamic_stitch(
                indices=partitioned_samples_indices, data=samples_class)
            # Reshape back to proper sample, batch, and event shape.
            ret = array_ops.reshape(
                lhs_flat_ret,
                array_ops.concat(
                    [samples_shape, self.event_shape_tensor()], 0))
            ret.set_shape(
                tensor_shape.TensorShape(static_samples_shape).concatenate(
                    self.event_shape))
            return ret
コード例 #18
0
 def _batch_shape(self):
     return tensor_shape.TensorShape(None)
コード例 #19
0
    def __init__(self,
                 max_tokens,
                 num_oov_indices,
                 mask_token,
                 oov_token,
                 vocabulary=None,
                 invert=False,
                 output_mode=INT,
                 sparse=False,
                 pad_to_max_tokens=False,
                 **kwargs):
        # If max_tokens is set, the value must be greater than 1 - otherwise we
        # are creating a 0-element vocab, which doesn't make sense.
        if max_tokens is not None and max_tokens <= 1:
            raise ValueError("If set, `max_tokens` must be greater than 1. "
                             "You passed {}".format(max_tokens))

        if num_oov_indices < 0:
            raise ValueError(
                "`num_oov_indices` must be greater than or equal to 0. "
                "You passed {}".format(num_oov_indices))

        # Support deprecated names for output_modes.
        if output_mode == "binary":
            output_mode = MULTI_HOT
        if output_mode == "tf-idf":
            output_mode = TF_IDF
        # 'output_mode' must be one of (INT, MULTI_HOT, COUNT, TF_IDF)
        layer_utils.validate_string_arg(output_mode,
                                        allowable_strings=(INT, MULTI_HOT,
                                                           COUNT, TF_IDF),
                                        layer_name=self.__class__.__name__,
                                        arg_name="output_mode")

        if invert and output_mode != INT:
            raise ValueError(
                "`output_mode` must be {} when `invert` is true. You "
                "passed {}".format(INT, output_mode))

        self.invert = invert
        self.max_tokens = max_tokens
        self.num_oov_indices = num_oov_indices
        self.oov_token = oov_token
        self.output_mode = output_mode
        self.sparse = sparse
        self.pad_to_max_tokens = pad_to_max_tokens
        self._called = False

        # A note on vocab_size: we need to always keep a non-Tensor representation
        # of vocab_size around to use in graph building. Because we might be
        # in a tf.function, we can't rely on evaluating the actual tables to
        # find the value either.
        self._vocab_size = None
        # We need to keep track our current vocab size outside of our layer weights
        # to support a static output shape when `output_mode != INT`. The bincount
        # ops do not set shape on their outputs, which means we have to set it
        # ourselves. We persist the current vocab size as a hidden part of the
        # config when serializing our model.
        if "vocabulary_size" in kwargs:
            self._vocab_size = kwargs["vocabulary_size"]
            del kwargs["vocabulary_size"]

        restore_from_static_table = kwargs.pop("has_static_table", False)

        # Make sure the mask token is truly of the dtype we want. We can ignore
        # strings here, because they have only one dtype.
        if mask_token is not None:
            dtype = kwargs["dtype"]
            if dtype == dtypes.int32:
                mask_token = np.int32(mask_token)
            elif dtype == dtypes.int64:
                mask_token = np.int64(mask_token)
        self.mask_token = mask_token

        if max_tokens is not None:
            available_vocab_size = max_tokens - self._token_start_index()
        else:
            available_vocab_size = None

        super(IndexLookup, self).__init__(combiner=_IndexLookupCombiner(
            vocab_size=available_vocab_size,
            mask_value=mask_token,
            oov_value=oov_token,
            compute_idf=(output_mode == TF_IDF)),
                                          **kwargs)

        # We need to save the key dtype so that we know if we're expecting int64
        # keys. If we are, we will cast int32 inputs to int64 as well.
        if invert:
            self._key_dtype = dtypes.int64
            self._value_dtype = self.dtype
            self._mask_key = 0
            self._mask_value = mask_token
            key_index = lookup_ops.TextFileIndex.LINE_NUMBER
            value_index = lookup_ops.TextFileIndex.WHOLE_LINE
            default_value = self.oov_token
            oov_indices = None
        else:
            self._key_dtype = self.dtype
            self._value_dtype = dtypes.int64
            self._mask_key = mask_token
            key_index = lookup_ops.TextFileIndex.WHOLE_LINE
            value_index = lookup_ops.TextFileIndex.LINE_NUMBER
            # Masks should map to 0 for int output and be dropped otherwise. Max ints
            # will be dropped from the bincount op.
            self._mask_value = 0 if self.output_mode == INT else dtypes.int64.max
            oov_start = self._oov_start_index()
            token_start = self._token_start_index()
            if self.num_oov_indices == 0:
                # If there are no OOV indices, we map OOV tokens to -1 for int output
                # and drop them from bagged output. Max ints will be dropped from the
                # bincount op.
                default_value = -1 if self.output_mode == INT else dtypes.int64.max
                oov_indices = None
            elif self.num_oov_indices == 1:
                # If there is only one OOV index, we can set that index as the default
                # value of the index_lookup table.
                default_value = oov_start
                oov_indices = None
            else:
                # If we hav multiple OOV values, we need to do a further hashing step;
                # to make this easier, we set the OOV value to -1. (This lets us do a
                # vectorized add and cast to boolean to determine locations where we
                # need to do extra hashing.)
                default_value = -1
                oov_indices = list(range(oov_start, token_start))

        self._static_vocabulary_path = None
        has_vocab_path = (vocabulary is not None
                          and isinstance(vocabulary, str))
        if has_vocab_path or restore_from_static_table:
            self._has_static_table = True
            if vocabulary is None:
                # If we're restoring a layer that was saved with a static table
                # initializer, we create a fake initializer object to let the code
                # progress. The savedmodel restoration code will handle restoring
                # the actual data.
                initializer = _NullInitializer(self._key_dtype,
                                               self._value_dtype)
            else:
                if not gfile.Exists(vocabulary):
                    raise ValueError("Vocabulary file %s does not exist." %
                                     (vocabulary, ))
                self._static_vocabulary_path = vocabulary
                num_tokens = table_utils.num_tokens_in_file(vocabulary)
                self._vocab_size = self._token_start_index() + num_tokens

                initializer = lookup_ops.TextFileInitializer(
                    filename=vocabulary,
                    key_dtype=self._key_dtype,
                    key_index=key_index,
                    value_dtype=self._value_dtype,
                    value_index=value_index,
                    value_index_offset=self._token_start_index())

            self._table = lookup_ops.StaticHashTable(
                initializer, default_value=default_value)
            self._table_handler = table_utils.TableHandler(
                table=self._table,
                mask_token=self._mask_key,
                mask_value=self._mask_value,
                oov_tokens=oov_indices)

            tracked_table = self._add_trackable(self._table, trainable=False)

        else:
            self._has_static_table = False
            self._table = lookup_ops.MutableHashTable(
                key_dtype=self._key_dtype,
                value_dtype=self._value_dtype,
                default_value=default_value,
                name=(self._name + "_index_table"))
            self._table_handler = table_utils.TableHandler(
                table=self._table, oov_tokens=oov_indices)
            if vocabulary is not None:
                self.set_vocabulary(vocabulary)
            tracked_table = self._add_trackable(self._table, trainable=False)

        if self.output_mode == TF_IDF:
            # The TF-IDF weight may have a (None,) tensorshape. This creates
            # a 1D variable with arbitrary shape, which we can assign any weight to
            # so long as it has 1 dimension. In order to properly initialize this
            # weight in Keras, we need to provide a custom callable initializer which
            # does not depend on the shape of the weight (as all other initializers
            # do) since the weight is not known. Hence the lambda shape, dtype: [0].
            if not self.pad_to_max_tokens or max_tokens is None:
                initializer = lambda shape, dtype: [0]
            else:
                initializer = init_ops.zeros_initializer

            # We are adding these here instead of in build() since they do not depend
            # on the input shape at all.
            idf_shape = (max_tokens, ) if self.pad_to_max_tokens else (None, )
            self.tf_idf_weights = self._add_state_variable(
                name="idf",
                shape=tensor_shape.TensorShape(idf_shape),
                dtype=backend.floatx(),
                initializer=initializer)

        # This is a workaround for summary() on this layer. Because the table is
        # not mutable during training, the effective number of parameters (and so
        # the weight shape) is 0; we add this as an attr so that the parameter
        # counting code in the Model object doesn't throw an attribute error.
        tracked_table.shape = tensor_shape.TensorShape((0, ))
コード例 #20
0
 def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     return tensor_shape.TensorShape(
         [input_shape[0], self.n, input_shape[1]])
コード例 #21
0
 def get_shape(self):
     return tensor_shape.TensorShape(self._stored_shape)
コード例 #22
0
 def convert_to_batch_shape(s):
     # Prepend a 1 for the batch dimension; for recurrent
     # variational dropout we use the same dropout mask for all
     # batch elements.
     return array_ops.concat(
         ([1], tensor_shape.TensorShape(s).as_list()), 0)
コード例 #23
0
 def testMakeAttrShapeList(self):
     shape_list = [[], None, [1, 2, 3], [None, None], [1, None, 3]]
     self.assertEqual(
         [tensor_shape.TensorShape(s).as_proto() for s in shape_list],
         backprop.make_attr([pywrap_tensorflow.TF_ATTR_SHAPE], shape_list))
コード例 #24
0
ファイル: cudnn_rnn.py プロジェクト: heiheiya/myTF
    def build(self, input_shape):
        """Create variables of the Cudnn RNN.

    It can be called manually before `__call__()` or automatically through
    `__call__()`. In the former case, subsequent `__call__()`s will skip
    creating variables.
    Args:
      input_shape: network input tensor shape, a python list or a TensorShape
        object with 3 dimensions.
    Raises:
      ValueError: if input_shape has wrong dimension or unknown 3rd dimension.
    """
        if self.built:
            return

        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape.ndims != 3:
            raise ValueError("Expecting input_shape with 3 dims, got %d" %
                             input_shape.ndims)
        if input_shape[-1].value is None:
            raise ValueError("The last dimension of the inputs to `CudnnRNN` "
                             "should be defined. Found `None`.")
        self._input_size = input_shape[-1].value
        self.input_spec = input_spec.InputSpec(ndim=3,
                                               axes={-1: self._input_size})

        self._set_scope(None)

        # Not using base class `add_variable()` since the it calls
        # `tf.get_variable()` with a callable initializer whereas here with a
        # tensor. The difference is mandated to support forward-compatibility with
        # Cudnn.
        with vs.variable_scope(self._scope,
                               reuse=self.built,
                               custom_getter=self._update_trainable_weights):
            if self._kernel_initializer is None:
                self._kernel_initializer = init_ops.glorot_uniform_initializer(
                    seed=self._seed, dtype=self._plain_dtype)
            if self._bias_initializer is None:
                self._bias_initializer = init_ops.constant_initializer(
                    0.0, dtype=self._plain_dtype)

            weights = [
                self._kernel_initializer(sp, dtype=self._plain_dtype)
                for sp in self.canonical_weight_shapes
            ]
            biases = [
                self._bias_initializer(sp, dtype=self._plain_dtype)
                for sp in self.canonical_bias_shapes
            ]
            opaque_params_t = self._canonical_to_opaque(weights, biases)

            if vs.get_variable_scope().partitioner is not None:
                logging.warn(
                    "Partitioner is not supported for Cudnn RNN layer variables, using "
                    "it will create forward-compatibility issues with future "
                    "CUDA/CuDNN generations.")
            # Initialize opaque params with a tensor with unknown shape, thus couldn't
            # use self.add_variable(name, shape, initializer, ...)
            self.kernel = vs.get_variable("opaque_kernel",
                                          dtype=self._plain_dtype,
                                          initializer=opaque_params_t,
                                          validate_shape=False)
        # Create saveable in the outer scope of the cudnn subgraph, such that
        # alternative subgraph with platform-independent rnn cells can load the
        # checkpoints directly.
        if not (self.built or vs.get_variable_scope().reuse is True):
            self._create_saveable()
        self.built = True
コード例 #25
0
ファイル: constant_op.py プロジェクト: zyfewq/tensorflow
def _ConstantShape(op):
    return [
        tensor_shape.TensorShape(
            [d.size for d in op.get_attr("value").tensor_shape.dim])
    ]
コード例 #26
0
ファイル: tpu_strategy.py プロジェクト: yzhuo1992/tensorflow
    def tpu_function(args, kwargs):
      """TF Function used to replicate the user computation."""
      if kwargs is None:
        kwargs = {}

      # Remove None at the end of args as they are not replicatable
      # If there are None in the middle we can't do anything about it
      # so let those cases fail.
      # For example when Keras model predict is used they pass the targets as
      # None. We want to handle it here so all client libraries don't have to
      # do this as other strategies can handle None values better.
      while args and args[-1] is None:
        args = args[:-1]

      # Used to re-structure flattened output tensors from `tpu.replicate()`
      # into a structured format.
      result = [[]]

      def replicated_fn(replica_id, replica_args, replica_kwargs):
        """Wraps user function to provide replica ID and `Tensor` inputs."""
        with _TPUReplicaContext(strategy, replica_id_in_sync_group=replica_id):
          result[0] = fn(*replica_args, **replica_kwargs)
        return result[0]

      replicate_inputs = []  # By replica.
      for i in range(strategy.num_replicas_in_sync):
        replicate_inputs.append(
            [constant_op.constant(i, dtype=dtypes.int32),
             distribute_utils.select_replica(i, args),
             distribute_utils.select_replica(i, kwargs)])

      # Construct and pass `maximum_shapes` so that we could support dynamic
      # shapes using dynamic padder.
      if options.experimental_enable_dynamic_batch_size and replicate_inputs:
        maximum_shapes = []
        flattened_list = nest.flatten(replicate_inputs[0])
        for input_tensor in flattened_list:
          if tensor_util.is_tensor(input_tensor):
            rank = input_tensor.get_shape().rank
          else:
            rank = np.ndim(input_tensor)
          maximum_shape = tensor_shape.TensorShape([None] * rank)
          maximum_shapes.append(maximum_shape)
        maximum_shapes = nest.pack_sequence_as(replicate_inputs[0],
                                               maximum_shapes)
      else:
        maximum_shapes = None

      if options.experimental_bucketizing_dynamic_shape:
        padding_spec = tpu.PaddingSpec.POWER_OF_TWO
      else:
        padding_spec = None

      with strategy.scope():
        replicate_outputs = tpu.replicate(
            replicated_fn,
            replicate_inputs,
            device_assignment=self._device_assignment,
            maximum_shapes=maximum_shapes,
            padding_spec=padding_spec)

      # Remove all no ops that may have been added during 'tpu.replicate()'
      if isinstance(result[0], list):
        result[0] = [
            output for output in result[0] if not isinstance(
                output, ops.Operation)
        ]

      # Workaround for `tpu.replicate` behaviour when single `Tensor` returned.
      if result[0] is None or isinstance(result[0], ops.Operation):
        replicate_outputs = [None] * len(replicate_outputs)
      else:
        replicate_outputs = [
            nest.pack_sequence_as(result[0], nest.flatten(replica_output))
            for replica_output in replicate_outputs
        ]
      return distribute_utils.regroup(replicate_outputs)
コード例 #27
0
ファイル: readers.py プロジェクト: Utsal20/poGANmon
 def output_shapes(self):
     return nest.map_structure(lambda _: tensor_shape.TensorShape([]),
                               self._output_types)
コード例 #28
0
    def from_structure(output_types,
                       output_shapes=None,
                       shared_name=None,
                       output_classes=None):
        """Creates a new, uninitialized `Iterator` with the given structure.

    This iterator-constructing method can be used to create an iterator that
    is reusable with many different datasets.

    The returned iterator is not bound to a particular dataset, and it has
    no `initializer`. To initialize the iterator, run the operation returned by
    `Iterator.make_initializer(dataset)`.

    The following is an example

    ```python
    iterator = Iterator.from_structure(tf.int64, tf.TensorShape([]))

    dataset_range = Dataset.range(10)
    range_initializer = iterator.make_initializer(dataset_range)

    dataset_evens = dataset_range.filter(lambda x: x % 2 == 0)
    evens_initializer = iterator.make_initializer(dataset_evens)

    # Define a model based on the iterator; in this example, the model_fn
    # is expected to take scalar tf.int64 Tensors as input (see
    # the definition of 'iterator' above).
    prediction, loss = model_fn(iterator.get_next())

    # Train for `num_epochs`, where for each epoch, we first iterate over
    # dataset_range, and then iterate over dataset_evens.
    for _ in range(num_epochs):
      # Initialize the iterator to `dataset_range`
      sess.run(range_initializer)
      while True:
        try:
          pred, loss_val = sess.run([prediction, loss])
        except tf.errors.OutOfRangeError:
          break

      # Initialize the iterator to `dataset_evens`
      sess.run(evens_initializer)
      while True:
        try:
          pred, loss_val = sess.run([prediction, loss])
        except tf.errors.OutOfRangeError:
          break
    ```

    Args:
      output_types: A (nested) structure of `tf.DType` objects corresponding to
        each component of an element of this dataset.
      output_shapes: (Optional.) A (nested) structure of `tf.TensorShape`
        objects corresponding to each component of an element of this dataset.
        If omitted, each component will have an unconstrainted shape.
      shared_name: (Optional.) If non-empty, this iterator will be shared under
        the given name across multiple sessions that share the same devices
        (e.g. when using a remote server).
      output_classes: (Optional.) A (nested) structure of Python `type` objects
        corresponding to each component of an element of this iterator. If
        omitted, each component is assumed to be of type `tf.Tensor`.

    Returns:
      An `Iterator`.

    Raises:
      TypeError: If the structures of `output_shapes` and `output_types` are
        not the same.
    """
        output_types = nest.map_structure(dtypes.as_dtype, output_types)
        if output_shapes is None:
            output_shapes = nest.map_structure(
                lambda _: tensor_shape.TensorShape(None), output_types)
        else:
            output_shapes = nest.map_structure_up_to(output_types,
                                                     tensor_shape.as_shape,
                                                     output_shapes)
        if output_classes is None:
            output_classes = nest.map_structure(lambda _: ops.Tensor,
                                                output_types)
        nest.assert_same_structure(output_types, output_shapes)
        output_structure = structure.convert_legacy_structure(
            output_types, output_shapes, output_classes)
        if shared_name is None:
            shared_name = ""
        iterator_resource = gen_dataset_ops.iterator_v2(
            container="",
            shared_name=shared_name,
            output_types=structure.get_flat_tensor_types(output_structure),
            output_shapes=structure.get_flat_tensor_shapes(output_structure))
        return Iterator(iterator_resource, None, output_types, output_shapes,
                        output_classes)
コード例 #29
0
    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if not input_shape.ndims:
            raise ValueError('Input has undefined rank:', input_shape)
        ndims = len(input_shape)

        # Convert axis to list and resolve negatives
        if isinstance(self.axis, int):
            self.axis = [self.axis]

        for idx, x in enumerate(self.axis):
            if x < 0:
                self.axis[idx] = ndims + x

        # Validate axes
        for x in self.axis:
            if x < 0 or x >= ndims:
                raise ValueError('Invalid axis: %d' % x)
        if len(self.axis) != len(set(self.axis)):
            raise ValueError('Duplicate axis: %s' % self.axis)

        if self.virtual_batch_size is not None:
            if self.virtual_batch_size <= 0:
                raise ValueError(
                    'virtual_batch_size must be a positive integer that '
                    'divides the true batch size of the input Tensor')
            # If using virtual batches, the first dimension must be the batch
            # dimension and cannot be the batch norm axis
            if 0 in self.axis:
                raise ValueError(
                    'When using virtual_batch_size, the batch dimension '
                    'must be 0 and thus axis cannot include 0')
            if self.adjustment is not None:
                raise ValueError(
                    'When using virtual_batch_size, adjustment cannot '
                    'be specified')

        if self.fused in (None, True):
            # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the
            # output back to its original shape accordingly.
            if self._USE_V2_BEHAVIOR:
                if self.fused is None:
                    self.fused = (ndims == 4)
                elif self.fused and ndims != 4:
                    raise ValueError(
                        'Batch normalization layers with fused=True only '
                        'support 4D input tensors.')
            else:
                assert self.fused is not None
                self.fused = (ndims == 4 and self._fused_can_be_used())
            # TODO(chrisying): fused batch norm is currently not supported for
            # multi-axis batch norm and by extension virtual batches. In some cases,
            # it might be possible to use fused batch norm but would require reshaping
            # the Tensor to 4D with the axis in 1 or 3 (preferred 1) which is
            # particularly tricky. A compromise might be to just support the most
            # common use case (turning 5D w/ virtual batch to NCHW)

        if self.fused:
            if self.axis == [1]:
                self._data_format = 'NCHW'
            elif self.axis == [3]:
                self._data_format = 'NHWC'
            else:
                raise ValueError(
                    'Unsupported axis, fused batch norm only supports '
                    'axis == [1] or axis == [3]')

        # Raise parameters of fp16 batch norm to fp32
        if self.dtype == dtypes.float16 or self.dtype == dtypes.bfloat16:
            param_dtype = dtypes.float32
        else:
            param_dtype = self.dtype or dtypes.float32

        axis_to_dim = {x: input_shape.dims[x].value for x in self.axis}
        for x in axis_to_dim:
            if axis_to_dim[x] is None:
                raise ValueError(
                    'Input has undefined `axis` dimension. Input shape: ',
                    input_shape)
        self.input_spec = InputSpec(ndim=ndims, axes=axis_to_dim)

        if len(axis_to_dim) == 1 and self.virtual_batch_size is None:
            # Single axis batch norm (most common/default use-case)
            param_shape = (list(axis_to_dim.values())[0], )
        else:
            # Parameter shape is the original shape but with 1 in all non-axis dims
            param_shape = [
                axis_to_dim[i] if i in axis_to_dim else 1 for i in range(ndims)
            ]
            if self.virtual_batch_size is not None:
                # When using virtual batches, add an extra dim at index 1
                param_shape.insert(1, 1)
                for idx, x in enumerate(self.axis):
                    self.axis[idx] = x + 1  # Account for added dimension

        if self.scale:
            self.gamma = self.add_weight(name='gamma',
                                         shape=param_shape,
                                         dtype=param_dtype,
                                         initializer=self.gamma_initializer,
                                         regularizer=self.gamma_regularizer,
                                         constraint=self.gamma_constraint,
                                         trainable=True)
        else:
            self.gamma = None
            if self.fused:
                self._gamma_const = array_ops.constant(1.0,
                                                       dtype=param_dtype,
                                                       shape=param_shape)

        if self.center:
            self.beta = self.add_weight(name='beta',
                                        shape=param_shape,
                                        dtype=param_dtype,
                                        initializer=self.beta_initializer,
                                        regularizer=self.beta_regularizer,
                                        constraint=self.beta_constraint,
                                        trainable=True)
        else:
            self.beta = None
            if self.fused:
                self._beta_const = array_ops.constant(0.0,
                                                      dtype=param_dtype,
                                                      shape=param_shape)

        try:
            # Disable variable partitioning when creating the moving mean and variance
            if hasattr(self, '_scope') and self._scope:
                partitioner = self._scope.partitioner
                self._scope.set_partitioner(None)
            else:
                partitioner = None
            self.moving_mean = self.add_weight(
                name='moving_mean',
                shape=param_shape,
                dtype=param_dtype,
                initializer=self.moving_mean_initializer,
                synchronization=tf_variables.VariableSynchronization.ON_READ,
                trainable=False,
                aggregation=tf_variables.VariableAggregation.MEAN)

            self.moving_variance = self.add_weight(
                name='moving_variance',
                shape=param_shape,
                dtype=param_dtype,
                initializer=self.moving_variance_initializer,
                synchronization=tf_variables.VariableSynchronization.ON_READ,
                trainable=False,
                aggregation=tf_variables.VariableAggregation.MEAN)

            if self.renorm:
                # Create variables to maintain the moving mean and standard deviation.
                # These are used in training and thus are different from the moving
                # averages above. The renorm variables are colocated with moving_mean
                # and moving_variance.
                # NOTE: below, the outer `with device` block causes the current device
                # stack to be cleared. The nested ones use a `lambda` to set the desired
                # device and ignore any devices that may be set by the custom getter.
                def _renorm_variable(name, shape):
                    var = self.add_weight(
                        name=name,
                        shape=shape,
                        dtype=param_dtype,
                        initializer=init_ops.zeros_initializer(),
                        synchronization=tf_variables.VariableSynchronization.
                        ON_READ,
                        trainable=False,
                        aggregation=tf_variables.VariableAggregation.MEAN)
                    return var

                with distribution_strategy_context.get_distribution_strategy(
                ).colocate_vars_with(self.moving_mean):
                    self.renorm_mean = _renorm_variable(
                        'renorm_mean', param_shape)
                    self.renorm_mean_weight = _renorm_variable(
                        'renorm_mean_weight', ())
                # We initialize renorm_stddev to 0, and maintain the (0-initialized)
                # renorm_stddev_weight. This allows us to (1) mix the average
                # stddev with the minibatch stddev early in training, and (2) compute
                # the unbiased average stddev by dividing renorm_stddev by the weight.
                with distribution_strategy_context.get_distribution_strategy(
                ).colocate_vars_with(self.moving_variance):
                    self.renorm_stddev = _renorm_variable(
                        'renorm_stddev', param_shape)
                    self.renorm_stddev_weight = _renorm_variable(
                        'renorm_stddev_weight', ())
        finally:
            if partitioner:
                self._scope.set_partitioner(partitioner)
        self.built = True
コード例 #30
0
 def output_size(self):
     # Return the cell output and the id
     return BasicDecoderOutput(rnn_output=self._rnn_output_size(),
                               sample_id=tensor_shape.TensorShape([]))