Beispiel #1
0
  def create_state_var(self, name, initial_value=None, data_shape=None, choice_dependent=None):
    """
    A state var is a variable where the initial value is given by the encoder, or a constant,
    and the final value is determined by one step of this rec layer (usually called the decoder).

    :param str name:
    :param tf.Tensor|None initial_value: assumes batch-major, if data_shape is not given
    :param Data|None data_shape:
    :param bool|None choice_dependent: choice dependent or not relevant
    :rtype: tf.Tensor
    """
    assert name not in self.state_vars
    assert data_shape or initial_value is not None
    if data_shape:
      assert isinstance(data_shape, Data)
    elif initial_value.shape.ndims == 0:
      data_shape = Data(name=name, batch_dim_axis=None, shape=(), dtype=initial_value.dtype.name)
    else:
      assert initial_value.shape.dims[0].value is None  # first is batch dim
      data_shape = Data(
        name=name, batch_dim_axis=0, shape=initial_value.shape.as_list()[1:], dtype=initial_value.dtype.name)
    if initial_value is not None:
      # initial_value might have dim 1 in variable dimensions (which are not the batch-dim-axis),
      # see get_rec_initial_output, which should be fine for broadcasting.
      initial_value.set_shape(data_shape.batch_shape)
    var = self.StateVar(parent=self, name=name, initial_value=initial_value, data_shape=data_shape, choice_dependent=choice_dependent)
    self.state_vars[name] = var
    return var.read()
Beispiel #2
0
 def __init__(self, name, network, n_out=None, out_type=None, sources=(),
              target=None, loss=None, loss_opts=None, L2=None, is_output_layer=None,
              trainable=True):
   """
   :param str name:
   :param TFNetwork.TFNetwork network:
   :param None|int n_out: output dim
   :param dict[str] out_type: kwargs for Data class. more explicit than n_out.
   :param list[LayerBase] sources:
   :param str|None target: if some loss is set, this is the target data-key, i.e. network.extern_data.get_data(target)
     alternatively, this also can be a layer name.
   :param str|None loss: if set, via get_loss
   :param dict[str]|None loss_opts: kwargs for Loss class, if loss is set
   :param float|None L2: for constraints
   :param bool|None is_output_layer:
   :param bool trainable: mostly ignored for now...
   """
   self.name = name
   self.network = network
   if loss and not target:
     target = self.network.extern_data.default_target
   self.target = target
   self.loss = None  # type: Loss
   if loss:
     loss_class = get_loss_class(loss)
     self.loss = loss_class(**(loss_opts or {}))
     if self.loss.recurrent:
       self.recurrent = True
   if out_type is None and n_out is None and target:
     n_out = self._get_target_value(mark_data_key_as_used=False).dim
     if self.loss:
       n_out = self.loss.get_auto_output_layer_dim(n_out)
   if out_type is None:
     assert n_out
     out_type = {"dim": n_out}
   out_type = out_type.copy()
   out_type.setdefault("name", "%s_output" % self.name)
   if n_out is not None:
     out_type.setdefault("dim", n_out)
     assert out_type["dim"] == n_out
   # You are supposed to set self.output.{batch_dim_axis,time_dim_axis} explicitly,
   # as well as check the inputs if they are as you would suggest.
   # However, a good default is often to use the same as the input.
   if sources and "batch_dim_axis" not in out_type:
     out_type.setdefault("batch_dim_axis", sources[0].output.batch_dim_axis)
     out_type.setdefault("time_dim_axis", sources[0].output.time_dim_axis)
   self.output = Data(**out_type)
   # You are supposed to set self.output.placeholder to the value which you want to return by the layer.
   # Normally you are also supposed to set self.output.size_placeholder explicitly, just like self.output.placeholder.
   # However, in many cases, this will just be {0: time-lengths} and the same as from the input.
   # We check for this case and preset it by that if possible.
   # If you want to have it different in your layer, just overwrite it.
   if sources and sources[0].output.matches_dim_pattern(self.output):
     self.output.size_placeholder = sources[0].output.size_placeholder.copy()
   self.output_before_activation = None  # type: None|OutputWithActivation
   self.sources = sources
   self.params = {}  # type: dict[str,tf.Variable]
   self.L2 = L2
   self._is_output_layer = is_output_layer
   self.trainable = trainable
Beispiel #3
0
 def get_out_data_from_opts(cls,
                            name,
                            sources,
                            pool_size,
                            n_out=None,
                            **kwargs):
     input_data = get_concat_sources_data_template(sources)
     assert not input_data.sparse
     return Data(
         name="%s_output" % name,
         shape=[
             input_data.get_placeholder_as_batch_major().shape[1].value,
             input_data.get_placeholder_as_batch_major().shape[2].value
         ],
         dtype=input_data.dtype,
         size_placeholder={
             0:
             tf.strided_slice(
                 input_data.size_placeholder[
                     input_data.time_dim_axis_excluding_batch], [0],
                 tf.shape(input_data.size_placeholder[
                     input_data.time_dim_axis_excluding_batch]),
                 [pool_size])
         },
         sparse=False,
         batch_dim_axis=0,
         time_dim_axis=1)
Beispiel #4
0
 def get_out_data_from_opts(cls,
                            name,
                            sources,
                            nr_of_channels,
                            n_out=None,
                            **kwargs):
     input_data = get_concat_sources_data_template(sources)
     assert not input_data.sparse
     return Data(
         name="%s_output" % name,
         shape=[
             input_data.get_placeholder_as_batch_major().shape[1].value,
             input_data.get_placeholder_as_batch_major().shape[2].value //
             nr_of_channels
         ],
         dtype=input_data.dtype,
         size_placeholder={
             0:
             tf.reshape(
                 tf.tile(
                     tf.reshape(
                         input_data.size_placeholder[
                             input_data.time_dim_axis_excluding_batch],
                         [-1, 1]), [1, nr_of_channels]), [-1])
         },
         sparse=False,
         batch_dim_axis=0,
         time_dim_axis=1)
Beispiel #5
0
 def __init__(self, axis=None, axis_kind=None,
              slice_start=None, slice_end=None, slice_step=None,
              **kwargs):
   """
   :param int|None axis:
   :param str|None axis_kind: "T" for time, "B" for batch, "F" for feature
   :param int|None slice_start:
   :param int|None slice_end:
   :param int|None slice_step:
   :param int|None n_out:
   """
   # Dummy out_type for now, will reset layer.
   super(SliceLayer, self).__init__(out_type={"shape": ()}, **kwargs)
   if axis is not None:
     assert not axis_kind
     assert 0 <= axis < len(self.input_data.batch_shape)
   else:
     assert axis_kind
     axis_kind = axis_kind.upper()
     if axis_kind == "T":
       assert self.input_data.time_dim_axis is not None
       axis = self.input_data.time_dim_axis
     elif axis_kind == "B":
       assert self.input_data.batch_dim_axis is not None
       axis = self.input_data.batch_dim_axis
     elif axis_kind == "F":
       axes = self.input_data.get_axes(exclude_time=True, exclude_batch=True)
       assert len(axes) == 1
       axis = axes[0]
   dim_slice = slice(slice_start, slice_end, slice_step)
   slices = [slice(None, None)] * axis + [dim_slice]
   out_type = self.input_data.get_kwargs()
   axis_wo_batch = self.input_data.get_batch_axis_excluding_batch(axis)
   if axis_wo_batch is not None:
     out_type["shape"] = list(out_type["shape"])
     if out_type["shape"][axis_wo_batch] is not None:
       out_type["shape"][axis_wo_batch] = len(range(out_type["shape"][axis_wo_batch])[dim_slice])
     if axis_wo_batch == len(out_type["shape"]) - 1 and not out_type["sparse"]:
       out_type["dim"] = out_type["shape"][axis_wo_batch]
   self.output = Data(**out_type)
   self.output.size_placeholder = self.input_data.size_placeholder
   if axis == self.input_data.time_dim_axis:
     if slice_start:
       assert slice_start > 0
       self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch] = \
         tf.maximum(0, self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch] - slice_start)
     if slice_end:
       assert slice_end > 0
       self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch] = \
         tf.minimum(
           tf.shape(self.input_data.placeholder)[self.input_data.time_dim_axis] - slice_end,
           self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch])
     if slice_step:
       self.output.size_placeholder[self.input_data.time_dim_axis_excluding_batch] //= slice_step
   elif axis_wo_batch is not None:
     assert axis_wo_batch not in self.output.size_placeholder
   self.output.placeholder = self.input_data.placeholder[slices]
Beispiel #6
0
def targetb_linear_out(sources, **kwargs):
    from TFUtil import Data
    enc = sources[1].output
    dec = sources[0].output
    size = enc.get_sequence_lengths()  # + dec.get_sequence_lengths()
    # output_len_tag.set_tag_on_size_tensor(size)
    return Data(name="targetb_linear",
                sparse=True,
                dim=eval("targetb_num_labels"),
                size_placeholder={0: size})
Beispiel #7
0
def concat_sources(src_layers):
  """
  :param list[LayerBase] src_layers:
  :return: data with placeholders set
  :rtype: Data
  """
  assert src_layers, "need source layers"
  if len(src_layers) == 1:
    return src_layers[0].output
  assert not src_layers[0].output.sparse, "sparse concat not supported"
  shape = src_layers[0].output.shape  # without batch-dim
  assert shape, "source must not be a scalar of layer %r" % src_layers[0]
  prefix_shape = shape[:-1]
  dim = 0
  dtype = src_layers[0].output.dtype
  batch_dim_axis = src_layers[0].output.batch_dim_axis
  time_dim_axis = src_layers[0].output.time_dim_axis
  for layer in src_layers:
    assert layer.output.dtype == dtype, "incompatible dtype with layer %r" % layer
    assert layer.output.batch_dim_axis == batch_dim_axis
    assert layer.output.time_dim_axis == time_dim_axis
    shape = layer.output.shape
    assert layer.output.placeholder.get_shape().ndims == len(shape) + 1  # with batch-dim
    assert shape, "source must not be a scalar of layer %r" % layer
    assert shape[:-1] == prefix_shape, "incompatible concat with layer %r" % layer
    assert shape[-1], "source last-dim must be specified of layer %r" % layer
    dim += shape[-1]
  data = Data(
    name="concat_sources",
    shape=prefix_shape + (dim,),
    dim=dim,
    sparse=False,
    batch_dim_axis=batch_dim_axis,
    time_dim_axis=time_dim_axis,
    dtype=dtype)
  data.placeholder = tf.concat(
    concat_dim=len(prefix_shape) + 1,  # one more because this is with batch-dim
    values=[layer.output.placeholder for layer in src_layers])
  data.size_placeholder = src_layers[0].output.size_placeholder.copy()
  return data
 def get_out_data_from_opts(cls,
                            name,
                            sources,
                            nr_of_channels,
                            n_out=None,
                            **kwargs):
     input_data = get_concat_sources_data_template(
         sources).copy_as_batch_major()
     assert not input_data.sparse
     return Data(name="%s_output" % name,
                 shape=[
                     input_data.batch_shape[1],
                     input_data.batch_shape[2] // nr_of_channels
                 ],
                 dtype=input_data.dtype,
                 sparse=False,
                 batch_dim_axis=0,
                 time_dim_axis=1)
Beispiel #9
0
 def get_out_data_from_opts(cls,
                            n_out=NotSpecified,
                            out_type=None,
                            sources=(),
                            **kwargs):
     """
 :param int|None|NotSpecified n_out:
 :param dict[str]|None out_type:
 :param list[LayerBase] sources:
 :rtype: Data
 """
     out_type_ = {}
     if sources and any(sources):
         out_type_.update(
             Data.get_common_data([s.output for s in sources
                                   if s]).get_kwargs())
     if n_out is not NotSpecified:
         out_type_["dim"] = n_out
     out_type_["name"] = "%s_output" % kwargs["name"]
     if out_type:
         if isinstance(out_type, dict):
             out_type_.update(out_type)
         elif callable(out_type):
             out_type_ = out_type  # just overwrite
         else:
             raise TypeError("unexpected type of out_type %r" %
                             (out_type, ))
     ######## ADDED ############
     #out_type_["batch_dim_axis"] = 0
     #out_type_["feature_dim_axis"] = 1
     #out_type_["time_dim_axis"] = 2
     ###########################
     return super(GatedRecurrentContextLayer,
                  cls).get_out_data_from_opts(n_out=n_out,
                                              out_type=out_type_,
                                              sources=sources,
                                              **kwargs)
Beispiel #10
0
 def get_out_data_from_opts(cls,
                            name,
                            sources,
                            repetitions,
                            n_out=None,
                            **kwargs):
     input_data = get_concat_sources_data_template(sources)
     assert not input_data.sparse
     return Data(
         name="%s_output" % name,
         shape=[
             input_data.get_placeholder_as_batch_major().shape[1].value,
             input_data.get_placeholder_as_batch_major().shape[2].value *
             repetitions
         ],
         dtype=input_data.dtype,
         sparse=False,
         size_placeholder={
             0:
             input_data.size_placeholder[
                 input_data.time_dim_axis_excluding_batch]
         },
         batch_dim_axis=0,
         time_dim_axis=1)
Beispiel #11
0
 def get_out_data_from_opts(cls, name, **kwargs):
     from TFUtil import Data
     return Data(name="%s_output" % name,
                 batch_dim_axis=None,
                 shape=(),
                 dtype="float32")  # scalar
Beispiel #12
0
class LayerBase(object):
  layer_class = None
  recurrent = False

  def __init__(self, name, network, n_out=None, out_type=None, sources=(),
               target=None, loss=None, loss_opts=None, L2=None, is_output_layer=None,
               trainable=True):
    """
    :param str name:
    :param TFNetwork.TFNetwork network:
    :param None|int n_out: output dim
    :param dict[str] out_type: kwargs for Data class. more explicit than n_out.
    :param list[LayerBase] sources:
    :param str|None target: if some loss is set, this is the target data-key, i.e. network.extern_data.get_data(target)
      alternatively, this also can be a layer name.
    :param str|None loss: if set, via get_loss
    :param dict[str]|None loss_opts: kwargs for Loss class, if loss is set
    :param float|None L2: for constraints
    :param bool|None is_output_layer:
    :param bool trainable: mostly ignored for now...
    """
    self.name = name
    self.network = network
    if loss and not target:
      target = self.network.extern_data.default_target
    self.target = target
    self.loss = None  # type: Loss
    if loss:
      loss_class = get_loss_class(loss)
      self.loss = loss_class(**(loss_opts or {}))
      if self.loss.recurrent:
        self.recurrent = True
    if out_type is None and n_out is None and target:
      n_out = self._get_target_value(mark_data_key_as_used=False).dim
      if self.loss:
        n_out = self.loss.get_auto_output_layer_dim(n_out)
    if out_type is None:
      assert n_out
      out_type = {"dim": n_out}
    out_type = out_type.copy()
    out_type.setdefault("name", "%s_output" % self.name)
    if n_out is not None:
      out_type.setdefault("dim", n_out)
      assert out_type["dim"] == n_out
    # You are supposed to set self.output.{batch_dim_axis,time_dim_axis} explicitly,
    # as well as check the inputs if they are as you would suggest.
    # However, a good default is often to use the same as the input.
    if sources and "batch_dim_axis" not in out_type:
      out_type.setdefault("batch_dim_axis", sources[0].output.batch_dim_axis)
      out_type.setdefault("time_dim_axis", sources[0].output.time_dim_axis)
    self.output = Data(**out_type)
    # You are supposed to set self.output.placeholder to the value which you want to return by the layer.
    # Normally you are also supposed to set self.output.size_placeholder explicitly, just like self.output.placeholder.
    # However, in many cases, this will just be {0: time-lengths} and the same as from the input.
    # We check for this case and preset it by that if possible.
    # If you want to have it different in your layer, just overwrite it.
    if sources and sources[0].output.matches_dim_pattern(self.output):
      self.output.size_placeholder = sources[0].output.size_placeholder.copy()
    self.output_before_activation = None  # type: None|OutputWithActivation
    self.sources = sources
    self.params = {}  # type: dict[str,tf.Variable]
    self.L2 = L2
    self._is_output_layer = is_output_layer
    self.trainable = trainable

  def __repr__(self):
    return "%s{class=%s, out_type=%s}" % (
      self.name, self.layer_class, self.output.get_description(with_name=False))

  @classmethod
  def cls_get_tf_scope_name(cls, name):
    """
    :param str name: layer name
    :return: scope name, might be just name
    """
    return name.replace(":", "__")

  @property
  def tf_scope_name(self):
    return self.cls_get_tf_scope_name(name=self.name)

  def is_output_layer(self):
    """
    Some code differs between an output layer and other layers.
    It is a bit arbitrary what we define as output layer.
    :rtype: bool
    """
    if self._is_output_layer is not None:
      return self._is_output_layer
    if self.target:
      return True
    if self.name == "output":
      return True
    return False

  def add_param(self, param):
    """
    :param tf.Variable param:
    :return: param
    :rtype tf.Variable
    """
    assert param.name
    self.params[param.name] = param
    return param

  def set_param_values_by_dict(self, values_dict, session):
    """
    :param dict[str,numpy.ndarray] values_dict:
    :param tf.Session session:
    """
    for param_name, values in values_dict.items():
      param = self.params[param_name]
      assert isinstance(param, tf.Variable)
      shape = param.get_shape()
      assert isinstance(shape, tf.TensorShape)
      assert shape.is_fully_defined()
      assert tuple(shape.as_list()) == values.shape
      self.network.get_var_assigner(param).assign(values, session=session)

  def get_param_values_dict(self, session):
    """
    :param tf.Session session:
    :return: dict name -> values
    :rtype: dict[str,numpy.ndarray]
    """
    d = {}
    for param_name, param in self.params.items():
      d[param_name] = param.eval(session)
    return d

  def _get_target_value(self, mark_data_key_as_used=True):
    """
    :param bool mark_data_key_as_used: forwarded self.network.get_extern_data()
    :rtype: Data | None
    """
    if not self.target or self.target == "none":
      return None
    if self.network.extern_data.has_data(self.target):
      return self.network.get_extern_data(self.target, mark_data_key_as_used=mark_data_key_as_used)
    if self.target in self.network.layers:
      return self.network.layers[self.target].output
    raise Exception("target %r unknown" % self.target)

  def _init_loss(self):
    if self.loss.output is self.output:
      return
    self.loss.init(
      output=self.output,
      output_with_activation=self.output_before_activation,
      target=self._get_target_value())

  def get_loss_value(self):
    """
    :return: the loss, a scalar value, or None if not set
    :rtype: tf.Tensor | None
    """
    if not self.loss:
      return None
    self._init_loss()
    with tf.name_scope("loss"):
      return self.loss.get_value()

  def get_error_value(self):
    """
    :return: usually the frame error rate, or None if not defined
    :rtype: tf.Tensor | None
    """
    if not self.loss:
      return None
    self._init_loss()
    with tf.name_scope("error"):
      return self.loss.get_error()

  def get_params_l2_norm(self):
    return 2 * sum([tf.nn.l2_loss(param) for (name, param) in sorted(self.params.items())])

  def get_constraints_value(self):
    c = 0
    if self.L2:
      c += self.L2 * self.get_params_l2_norm()
    if c is 0:
      return None
    return c