Beispiel #1
0
  def create_state_var(self, name, initial_value=None, data_shape=None, choice_dependent=None):
    """
    A state var is a variable where the initial value is given by the encoder, or a constant,
    and the final value is determined by one step of this rec layer (usually called the decoder).

    :param str name:
    :param tf.Tensor|None initial_value: assumes batch-major, if data_shape is not given
    :param Data|None data_shape:
    :param bool|None choice_dependent: choice dependent or not relevant
    :rtype: tf.Tensor
    """
    assert name not in self.state_vars
    assert data_shape or initial_value is not None
    if data_shape:
      assert isinstance(data_shape, Data)
    elif initial_value.shape.ndims == 0:
      data_shape = Data(name=name, batch_dim_axis=None, shape=(), dtype=initial_value.dtype.name)
    else:
      assert initial_value.shape.dims[0].value is None  # first is batch dim
      data_shape = Data(
        name=name, batch_dim_axis=0, shape=initial_value.shape.as_list()[1:], dtype=initial_value.dtype.name)
    if initial_value is not None:
      # initial_value might have dim 1 in variable dimensions (which are not the batch-dim-axis),
      # see get_rec_initial_output, which should be fine for broadcasting.
      initial_value.set_shape(data_shape.batch_shape)
    var = self.StateVar(parent=self, name=name, initial_value=initial_value, data_shape=data_shape, choice_dependent=choice_dependent)
    self.state_vars[name] = var
    return var.read()
Beispiel #2
0
 def get_out_data_from_opts(cls,
                            name,
                            sources,
                            pool_size,
                            n_out=None,
                            **kwargs):
     input_data = get_concat_sources_data_template(sources)
     assert not input_data.sparse
     return Data(
         name="%s_output" % name,
         shape=[
             input_data.get_placeholder_as_batch_major().shape[1].value,
             input_data.get_placeholder_as_batch_major().shape[2].value
         ],
         dtype=input_data.dtype,
         size_placeholder={
             0:
             tf.strided_slice(
                 input_data.size_placeholder[
                     input_data.time_dim_axis_excluding_batch], [0],
                 tf.shape(input_data.size_placeholder[
                     input_data.time_dim_axis_excluding_batch]),
                 [pool_size])
         },
         sparse=False,
         batch_dim_axis=0,
         time_dim_axis=1)
Beispiel #3
0
 def get_out_data_from_opts(cls,
                            name,
                            sources,
                            nr_of_channels,
                            n_out=None,
                            **kwargs):
     input_data = get_concat_sources_data_template(sources)
     assert not input_data.sparse
     return Data(
         name="%s_output" % name,
         shape=[
             input_data.get_placeholder_as_batch_major().shape[1].value,
             input_data.get_placeholder_as_batch_major().shape[2].value //
             nr_of_channels
         ],
         dtype=input_data.dtype,
         size_placeholder={
             0:
             tf.reshape(
                 tf.tile(
                     tf.reshape(
                         input_data.size_placeholder[
                             input_data.time_dim_axis_excluding_batch],
                         [-1, 1]), [1, nr_of_channels]), [-1])
         },
         sparse=False,
         batch_dim_axis=0,
         time_dim_axis=1)
Beispiel #4
0
 def __init__(self, name, network, n_out=None, out_type=None, sources=(),
              target=None, loss=None, loss_opts=None, L2=None, is_output_layer=None,
              trainable=True):
   """
   :param str name:
   :param TFNetwork.TFNetwork network:
   :param None|int n_out: output dim
   :param dict[str] out_type: kwargs for Data class. more explicit than n_out.
   :param list[LayerBase] sources:
   :param str|None target: if some loss is set, this is the target data-key, i.e. network.extern_data.get_data(target)
     alternatively, this also can be a layer name.
   :param str|None loss: if set, via get_loss
   :param dict[str]|None loss_opts: kwargs for Loss class, if loss is set
   :param float|None L2: for constraints
   :param bool|None is_output_layer:
   :param bool trainable: mostly ignored for now...
   """
   self.name = name
   self.network = network
   if loss and not target:
     target = self.network.extern_data.default_target
   self.target = target
   self.loss = None  # type: Loss
   if loss:
     loss_class = get_loss_class(loss)
     self.loss = loss_class(**(loss_opts or {}))
     if self.loss.recurrent:
       self.recurrent = True
   if out_type is None and n_out is None and target:
     n_out = self._get_target_value(mark_data_key_as_used=False).dim
     if self.loss:
       n_out = self.loss.get_auto_output_layer_dim(n_out)
   if out_type is None:
     assert n_out
     out_type = {"dim": n_out}
   out_type = out_type.copy()
   out_type.setdefault("name", "%s_output" % self.name)
   if n_out is not None:
     out_type.setdefault("dim", n_out)
     assert out_type["dim"] == n_out
   # You are supposed to set self.output.{batch_dim_axis,time_dim_axis} explicitly,
   # as well as check the inputs if they are as you would suggest.
   # However, a good default is often to use the same as the input.
   if sources and "batch_dim_axis" not in out_type:
     out_type.setdefault("batch_dim_axis", sources[0].output.batch_dim_axis)
     out_type.setdefault("time_dim_axis", sources[0].output.time_dim_axis)
   self.output = Data(**out_type)
   # You are supposed to set self.output.placeholder to the value which you want to return by the layer.
   # Normally you are also supposed to set self.output.size_placeholder explicitly, just like self.output.placeholder.
   # However, in many cases, this will just be {0: time-lengths} and the same as from the input.
   # We check for this case and preset it by that if possible.
   # If you want to have it different in your layer, just overwrite it.
   if sources and sources[0].output.matches_dim_pattern(self.output):
     self.output.size_placeholder = sources[0].output.size_placeholder.copy()
   self.output_before_activation = None  # type: None|OutputWithActivation
   self.sources = sources
   self.params = {}  # type: dict[str,tf.Variable]
   self.L2 = L2
   self._is_output_layer = is_output_layer
   self.trainable = trainable
Beispiel #5
0
 def __init__(self, axis=None, axis_kind=None,
              slice_start=None, slice_end=None, slice_step=None,
              **kwargs):
   """
   :param int|None axis:
   :param str|None axis_kind: "T" for time, "B" for batch, "F" for feature
   :param int|None slice_start:
   :param int|None slice_end:
   :param int|None slice_step:
   :param int|None n_out:
   """
   # Dummy out_type for now, will reset layer.
   super(SliceLayer, self).__init__(out_type={"shape": ()}, **kwargs)
   if axis is not None:
     assert not axis_kind
     assert 0 <= axis < len(self.input_data.batch_shape)
   else:
     assert axis_kind
     axis_kind = axis_kind.upper()
     if axis_kind == "T":
       assert self.input_data.time_dim_axis is not None
       axis = self.input_data.time_dim_axis
     elif axis_kind == "B":
       assert self.input_data.batch_dim_axis is not None
       axis = self.input_data.batch_dim_axis
     elif axis_kind == "F":
       axes = self.input_data.get_axes(exclude_time=True, exclude_batch=True)
       assert len(axes) == 1
       axis = axes[0]
   dim_slice = slice(slice_start, slice_end, slice_step)
   slices = [slice(None, None)] * axis + [dim_slice]
   out_type = self.input_data.get_kwargs()
   axis_wo_batch = self.input_data.get_batch_axis_excluding_batch(axis)
   if axis_wo_batch is not None:
     out_type["shape"] = list(out_type["shape"])
     if out_type["shape"][axis_wo_batch] is not None:
       out_type["shape"][axis_wo_batch] = len(range(out_type["shape"][axis_wo_batch])[dim_slice])
     if axis_wo_batch == len(out_type["shape"]) - 1 and not out_type["sparse"]:
       out_type["dim"] = out_type["shape"][axis_wo_batch]
   self.output = Data(**out_type)
   self.output.size_placeholder = self.input_data.size_placeholder
   if axis == self.input_data.time_dim_axis:
     if slice_start:
       assert slice_start > 0
       self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch] = \
         tf.maximum(0, self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch] - slice_start)
     if slice_end:
       assert slice_end > 0
       self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch] = \
         tf.minimum(
           tf.shape(self.input_data.placeholder)[self.input_data.time_dim_axis] - slice_end,
           self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch])
     if slice_step:
       self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch] //= slice_step
   elif axis_wo_batch is not None:
     assert axis_wo_batch not in self.output.size_placeholder
   self.output.placeholder = self.input_data.placeholder[slices]
Beispiel #6
0
def targetb_linear_out(sources, **kwargs):
    from TFUtil import Data
    enc = sources[1].output
    dec = sources[0].output
    size = enc.get_sequence_lengths()  # + dec.get_sequence_lengths()
    # output_len_tag.set_tag_on_size_tensor(size)
    return Data(name="targetb_linear",
                sparse=True,
                dim=eval("targetb_num_labels"),
                size_placeholder={0: size})
 def get_out_data_from_opts(cls,
                            name,
                            sources,
                            nr_of_channels,
                            n_out=None,
                            **kwargs):
     input_data = get_concat_sources_data_template(
         sources).copy_as_batch_major()
     assert not input_data.sparse
     return Data(name="%s_output" % name,
                 shape=[
                     input_data.batch_shape[1],
                     input_data.batch_shape[2] // nr_of_channels
                 ],
                 dtype=input_data.dtype,
                 sparse=False,
                 batch_dim_axis=0,
                 time_dim_axis=1)
Beispiel #8
0
def concat_sources(src_layers):
  """
  :param list[LayerBase] src_layers:
  :return: data with placeholders set
  :rtype: Data
  """
  assert src_layers, "need source layers"
  if len(src_layers) == 1:
    return src_layers[0].output
  assert not src_layers[0].output.sparse, "sparse concat not supported"
  shape = src_layers[0].output.shape  # without batch-dim
  assert shape, "source must not be a scalar of layer %r" % src_layers[0]
  prefix_shape = shape[:-1]
  dim = 0
  dtype = src_layers[0].output.dtype
  batch_dim_axis = src_layers[0].output.batch_dim_axis
  time_dim_axis = src_layers[0].output.time_dim_axis
  for layer in src_layers:
    assert layer.output.dtype == dtype, "incompatible dtype with layer %r" % layer
    assert layer.output.batch_dim_axis == batch_dim_axis
    assert layer.output.time_dim_axis == time_dim_axis
    shape = layer.output.shape
    assert layer.output.placeholder.get_shape().ndims == len(shape) + 1  # with batch-dim
    assert shape, "source must not be a scalar of layer %r" % layer
    assert shape[:-1] == prefix_shape, "incompatible concat with layer %r" % layer
    assert shape[-1], "source last-dim must be specified of layer %r" % layer
    dim += shape[-1]
  data = Data(
    name="concat_sources",
    shape=prefix_shape + (dim,),
    dim=dim,
    sparse=False,
    batch_dim_axis=batch_dim_axis,
    time_dim_axis=time_dim_axis,
    dtype=dtype)
  data.placeholder = tf.concat(
    concat_dim=len(prefix_shape) + 1,  # one more because this is with batch-dim
    values=[layer.output.placeholder for layer in src_layers])
  data.size_placeholder = src_layers[0].output.size_placeholder.copy()
  return data
Beispiel #9
0
 def get_out_data_from_opts(cls,
                            name,
                            sources,
                            repetitions,
                            n_out=None,
                            **kwargs):
     input_data = get_concat_sources_data_template(sources)
     assert not input_data.sparse
     return Data(
         name="%s_output" % name,
         shape=[
             input_data.get_placeholder_as_batch_major().shape[1].value,
             input_data.get_placeholder_as_batch_major().shape[2].value *
             repetitions
         ],
         dtype=input_data.dtype,
         sparse=False,
         size_placeholder={
             0:
             input_data.size_placeholder[
                 input_data.time_dim_axis_excluding_batch]
         },
         batch_dim_axis=0,
         time_dim_axis=1)
Beispiel #10
0
 def get_out_data_from_opts(cls, name, **kwargs):
     from TFUtil import Data
     return Data(name="%s_output" % name,
                 batch_dim_axis=None,
                 shape=(),
                 dtype="float32")  # scalar