Ejemplo n.º 1
0
def log_softmax(z):
  assert z.ndim >= 1
  if z.ndim <= 2:
    return T.nnet.logsoftmax(z)
  else:
    from returnn.theano.util import time_batch_make_flat
    z_flat = time_batch_make_flat(z)
    assert z_flat.ndim == 2
    return T.reshape(T.nnet.logsoftmax(z_flat), z.shape)
Ejemplo n.º 2
0
 def add_layer(self, layer):
   """
   :type layer: NetworkHiddenLayer.Layer
   :rtype NetworkHiddenLayer.Layer
   """
   assert layer.name
   layer_errors = layer.errors()
   if isinstance(layer, OutputLayer) or layer.name == "output" or layer_errors is not None:
     is_output_layer = True
     self.output[layer.name] = layer
   else:
     is_output_layer = False
     self.hidden[layer.name] = layer
   if layer_errors is not None:
     self.errors[layer.name] = layer_errors
   if is_output_layer:
     if getattr(layer, "p_y_given_x", None) is None and layer.output:
       # Small little hack for layers which we use as output-layers whicgh don't set this.
       from returnn.theano.util import time_batch_make_flat
       layer.p_y_given_x = layer.output
       layer.p_y_given_x_flat = time_batch_make_flat(layer.output)
     self.declare_train_params()
   return layer
Ejemplo n.º 3
0
 def __init__(self, sources, n_out, index, y_in=None, target=None, target_index=None,
              sparse=False, cost_scale=1.0, input_scale=1.0,
              L1=0.0, L2=0.0, L2_eye=None, varreg=0.0,
              output_L2_reg=0.0, output_entropy_reg=0.0, output_entropy_exp_reg=0.0,
              with_bias=True,
              mask="unity", dropout=0.0, batch_drop=False, batch_norm=False, bn_use_sample=False, layer_drop=0.0, residual=False,
              carry=False,
              sparse_filtering=False, gradient_scale=1.0, trainable=True, device=None,
              dtype='float32',
              **kwargs):
   """
   :param list[NetworkBaseLayer.Layer] sources: list of source layers
   :param int n_out: output dim of W_in and dim of bias
   :param float L1: l1-param-norm regularization
   :param float L2: l2-param-norm regularization
   :param str mask: "unity" or "dropout"
   :type dropout: float
   """
   super(Layer, self).__init__(**kwargs)
   self.index = index
   self.sources = sources; ":type: list[Layer]"
   self.num_sources = len(sources)
   self.D = max([s.D for s in sources if isinstance(s,Layer)] + [0])
   if mask is None: mask = 'none'
   self.set_attr('mask', mask)
   self.set_attr('dropout', dropout)
   self.set_attr('sparse', sparse)
   self.set_attr('bn_use_sample', bn_use_sample)
   self.set_attr('sparse_filtering', sparse_filtering)
   if not trainable:
     self.set_attr('trainable', trainable)  # only store if not default
     self.gradient_scale = 0.0  # just to be sure
   else:
     self.gradient_scale = gradient_scale
   if gradient_scale != 1.0:
     self.set_attr('gradient_scale', gradient_scale)
   self.set_attr('layer_drop', layer_drop)
   assert not carry, "not supported anymore"
   self.set_attr('residual', residual)
   self.set_attr('n_out', n_out)
   self.set_attr('L1', L1)
   self.set_attr('L2', L2)
   if L2_eye:
     self.set_attr('L2_eye', L2_eye)
   self.device = device # if device else str(theano.config.device)
   for s in self.sources:
     s.transfer_output(self.device)
   self.set_attr('varreg', varreg)
   if output_L2_reg:
     self.set_attr('output_L2_reg', output_L2_reg)
   if output_entropy_reg:
     self.set_attr('output_entropy_reg', output_entropy_reg)
   if output_entropy_exp_reg:
     self.set_attr('output_entropy_exp_reg', output_entropy_exp_reg)
   self.set_attr('batch_norm', batch_norm)
   self.set_attr('input_scale', input_scale)
   if y_in is not None:
     self.y_in = {}
     for k in y_in:
       if not isinstance(y_in[k], T.Variable): continue
       self.y_in[k] = time_batch_make_flat(y_in[k])  # TODO: better not flatten here...
       self.y_in[k].n_out = getattr(y_in[k], "n_out", None)
   else:
     self.y_in = None
   self.constraints = T.constant(0)
   if target:
     self.set_attr('target', target)
   if target_index:
     self.set_attr('target_index', target_index)
     assert target_index in self.network.j
     self.index = index = self.network.j[target_index]
   if cost_scale != 1:
     self.set_attr("cost_scale", cost_scale)
   if with_bias:
     self.b = self.add_param(self.create_bias(n_out), 'b_%s'%self.name)
   else:
     self.set_attr('with_bias', False)
     self.b = numpy.float32(0)
   self.mass = T.constant(1., name = "mass_%s" % self.name, dtype='float32')
   self.masks = [None] * len(self.sources)
   assert mask in ['dropout', 'unity', 'none'], "invalid mask: %s" % mask
   if mask == "dropout" or (mask == 'none' and dropout > 0):
     assert 0.0 < dropout < 1.0
     # If we apply this mass during training then we don't need any mask or mass for testing.
     # The expected weight should be 1 in
     #   E[x] = mass * (1-dropout)
     # so mass has to be 1 / (1 - dropout).
     self.mass = T.constant(1.0 / (1.0 - dropout), dtype='float32')
     from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
     srng = RandomStreams(self.rng.randint(1234) + 1)
     if self.depth > 1:
       self.masks = [T.cast(srng.binomial(n=1, p=1 - dropout, size=(s.attrs['n_out'],self.depth)), theano.config.floatX) for s in self.sources]
     else:
       if batch_drop:
         self.masks = [T.cast(srng.binomial(n=1, p=1 - dropout, size=s.output.shape), theano.config.floatX) for s in self.sources]
       else:
         self.masks = [T.cast(srng.binomial(n=1, p=1 - dropout, size=(s.attrs['n_out'],)), theano.config.floatX) for s in self.sources]