Beispiel #1
0
def apply_dropout(x, level=0.5, noise_dims=None, noise_type='uniform',
                  rescale=True, name="ApplyDropout"):
  """Computes dropout.

  With probability `keep_prob`, outputs the input element scaled up by
  `1 / keep_prob`, otherwise outputs `0`.  The scaling is so that the expected
  sum is unchanged.


  Parameters
  ----------
  x: A tensor.
      input tensor
  level: float(0.-1.)
      probability dropout values in given tensor
  noise_dims: int or list(int)
      these dimensions will be setted to 1 in noise_shape, and
      used to broadcast the dropout mask.
  noise_type: 'gaussian' (or 'normal'), 'uniform'
      distribution used for generating noise
  rescale: bool
      whether rescale the outputs by dividing the retain probablity
  seed: random seed or `tensor.rng`
      random generator from tensor class

  References
  ----------
  [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)

  Note
  ----
  This function only apply noise on Variable when training is enable
  """
  shape = tf.shape(x)
  retain_prob = 1. - level
  # ====== not a training variable NO dropout ====== #
  if 'normal' in noise_type or 'gaussian' in noise_type:
    randfunc = lambda shape: tf.random_normal(shape=shape,
        mean=1.0, stddev=np.sqrt((1.0 - retain_prob) / retain_prob),
        dtype=x.dtype.base_dtype, seed=randint())
  elif 'uniform' in noise_type:
    randfunc = lambda shape: random_binomial(shape=shape,
        p=retain_prob, dtype=x.dtype.base_dtype, seed=randint())
  else:
    raise ValueError('No support for noise_type=' + noise_type)

  # ====== Dropout ====== #
  def training_fn():
    noise_shape = shape if noise_dims is None else \
        _process_noise_dim(shape, noise_dims)
    y = x * randfunc(shape=noise_shape)
    if rescale:
      y /= retain_prob
    return y

  def inference_fn():
    return x
  with tf.variable_scope(name):
    return tf.cond(is_training(), training_fn, inference_fn)
Beispiel #2
0
 def training_fn():
   noise_shape = (shape if noise_dims is None
                  else _process_noise_dim(shape, noise_dims))
   if 'normal' in noise_type or 'gaussian' in noise_type:
     noise = tf.random_normal(shape=noise_shape,
         mean=0.0, stddev=level, dtype=x.dtype.base_dtype, seed=randint())
   elif 'uniform' in noise_type:
     noise = tf.random_uniform(shape=noise_shape,
         minval=-level, maxval=level,
         dtype=x.dtype.base_dtype, seed=randint())
   else:
     raise ValueError('No support for noise_type=' + noise_type)
   return x + noise
Beispiel #3
0
    def __init__(self,
                 num_units,
                 W_init=init_ops.glorot_uniform_initializer(seed=randint()),
                 b_init=init_ops.constant_initializer(0.),
                 rnn_mode='lstm',
                 num_layers=1,
                 skip_input=False,
                 is_bidirectional=False,
                 return_states=False,
                 dropout=0.,
                 **kwargs):
        super(CudnnRNN, self).__init__(**kwargs)
        # ====== defaults recurrent control ====== #
        self.num_units = int(num_units)
        self.num_layers = int(num_layers)
        self.rnn_mode = str(rnn_mode)
        self.skip_input = bool(skip_input)
        self.is_bidirectional = bool(is_bidirectional)
        self.return_states = bool(return_states)
        self.dropout = dropout

        self.W_init = W_init
        self.b_init = b_init
        if skip_input:
            wprint("`skip_input` is not supported in Tensorflow.")
Beispiel #4
0
 def __init__(self, n_new_features, n_time_context,
              time_pool='max', backward=False,
              W_init=init_ops.glorot_uniform_initializer(seed=randint()),
              b_init=init_ops.constant_initializer(0),
              activation=K.linear, **kwargs):
   super(TimeDelayedDense, self).__init__(**kwargs)
   if n_new_features is None:
     self.n_new_features = []
   else:
     self.n_new_features = as_tuple(n_new_features, t=int)
   self.n_time_context = int(n_time_context)
   self.n_layers = len(self.n_new_features)
   # ====== initialization ====== #
   self.W_init = W_init
   self.b_init = b_init
   # ====== activation ====== #
   if activation is None:
     activation = K.linear
   if not isinstance(activation, (tuple, list)):
     activation = (activation,)
   activation = [K.linear if i is None else i
                 for i in activation]
   self.activation = as_tuple(activation, N=self.n_layers)
   # ====== time axis manipulation ====== #
   time_pool = str(time_pool).lower()
   assert time_pool in _allow_time_pool, \
   "Only support: %s; but given: '%s'" % (str(_allow_time_pool), str(time_pool))
   self.time_pool = time_pool
   self.backward = bool(backward)
Beispiel #5
0
 def __init__(self,
              input_size,
              output_size,
              W_init=init_ops.random_uniform_initializer(seed=randint()),
              **kwargs):
     super(Embedding, self).__init__(**kwargs)
     self.input_size = input_size
     self.output_size = output_size
     self.W_init = W_init
Beispiel #6
0
 def training_fn():
     noise_shape = (shape if noise_dims is None else _process_noise_dim(
         shape, noise_dims))
     if 'normal' in noise_type or 'gaussian' in noise_type:
         noise = tf.random_normal(shape=noise_shape,
                                  mean=0.0,
                                  stddev=level,
                                  dtype=x.dtype.base_dtype,
                                  seed=randint())
     elif 'uniform' in noise_type:
         noise = tf.random_uniform(shape=noise_shape,
                                   minval=-level,
                                   maxval=level,
                                   dtype=x.dtype.base_dtype,
                                   seed=randint())
     else:
         raise ValueError('No support for noise_type=' + noise_type)
     return x + noise
Beispiel #7
0
 def __init__(self, n_new_features, n_time_context,
              time_pool='max', backward=False,
              W_init=init_ops.glorot_uniform_initializer(seed=randint()),
              b_init=init_ops.constant_initializer(0),
              activation=K.linear, **kwargs):
   super(TimeDelayedConv, self).__init__(**kwargs)
   self.n_new_features = int(n_new_features)
   self.n_time_context = int(n_time_context)
   self.W_init = W_init
   self.b_init = b_init
   self.activation = activation
   # ====== time axis manipulation ====== #
   time_pool = str(time_pool).lower()
   assert time_pool in _allow_time_pool, \
   "Only support: %s; but given: '%s'" % (str(_allow_time_pool), str(time_pool))
   self.time_pool = time_pool
   self.backward = bool(backward)
Beispiel #8
0
Datei: rnn.py Projekt: imito/odin
  def __init__(self, num_units,
          W_init=init_ops.glorot_uniform_initializer(seed=randint()),
          b_init=init_ops.constant_initializer(0.),
          rnn_mode='lstm', num_layers=1,
          skip_input=False, is_bidirectional=False,
          return_states=False, dropout=0., **kwargs):
    super(CudnnRNN, self).__init__(**kwargs)
    # ====== defaults recurrent control ====== #
    self.num_units = int(num_units)
    self.num_layers = int(num_layers)
    self.rnn_mode = str(rnn_mode)
    self.skip_input = bool(skip_input)
    self.is_bidirectional = bool(is_bidirectional)
    self.return_states = bool(return_states)
    self.dropout = dropout

    self.W_init = W_init
    self.b_init = b_init
    if skip_input:
      wprint("`skip_input` is not supported in Tensorflow.")
Beispiel #9
0
 def __init__(self,
              num_filters,
              filter_size,
              strides=1,
              pad='valid',
              W_init=init_ops.glorot_uniform_initializer(seed=randint()),
              b_init=init_ops.constant_initializer(0),
              untie_biases=False,
              activation=K.linear,
              dilation=1,
              **kwargs):
     super(Conv, self).__init__(**kwargs)
     self.num_filters = int(num_filters)
     self.filter_size = filter_size
     self.strides = strides
     self.pad = pad
     self.W_init = W_init
     self.b_init = b_init
     self.untie_biases = bool(untie_biases)
     self.dilation = dilation
     self.activation = K.linear if activation is None else activation
Beispiel #10
0
 def __init__(self,
              num_filters,
              filter_size,
              strides=1,
              pad='valid',
              W_init=init_ops.glorot_uniform_initializer(seed=randint()),
              b_init=init_ops.constant_initializer(0),
              untie_biases=False,
              activation=K.linear,
              dilation=1,
              output_shape=None,
              **kwargs):
     super(TransposeConv, self).__init__(num_filters=num_filters,
                                         filter_size=filter_size,
                                         strides=strides,
                                         pad=pad,
                                         W_init=W_init,
                                         b_init=b_init,
                                         untie_biases=untie_biases,
                                         activation=activation,
                                         dilation=dilation,
                                         **kwargs)
     # explicit output shape
     self._output_shape = output_shape
Beispiel #11
0
 def __init__(self, input_size, output_size,
              W_init=init_ops.random_uniform_initializer(seed=randint()), **kwargs):
   super(Embedding, self).__init__(**kwargs)
   self.input_size = input_size
   self.output_size = output_size
   self.W_init = W_init
Beispiel #12
0
def randrectify(x, lower=0.3, upper=0.8, shared_axes='auto', name="RandRectify"):
  """ This function is adpated from Lasagne
  Original work Copyright (c) 2014-2015 lasagne contributors
  All rights reserved.
  LICENSE: https://github.com/Lasagne/Lasagne/blob/master/LICENSE

  Applies a randomized leaky rectify activation to x.

  The randomized leaky rectifier was first proposed and used in the Kaggle
  NDSB Competition, and later evaluated in [1]_. Compared to the standard
  leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope
  for negative input during training, and a fixed slope during evaluation.

  Equation for the randomized rectifier linear unit during training:
  :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)`

  During evaluation, the factor is fixed to the arithmetic mean of `lower`
  and `upper`.

  Parameters
  ----------
  lower : Theano shared variable, expression, or constant
      The lower bound for the randomly chosen slopes.

  upper : Theano shared variable, expression, or constant
      The upper bound for the randomly chosen slopes.

  shared_axes : 'auto', 'all', int or tuple of int
      The axes along which the random slopes of the rectifier units are
      going to be shared. If ``'auto'`` (the default), share over all axes
      except for the second - this will share the random slope over the
      minibatch dimension for dense layers, and additionally over all
      spatial dimensions for convolutional layers. If ``'all'``, share over
      all axes, thus using a single random slope.

   References
  ----------
  .. [1] Bing Xu, Naiyan Wang et al. (2015):
     Empirical Evaluation of Rectified Activations in Convolutional Network,
     http://arxiv.org/abs/1505.00853
  """
  ndims = x.shape.ndims
  # ====== check lower and upper ====== #
  if is_variable(lower):
    add_roles(lower, ActivationParameter)
  if is_variable(upper):
    add_roles(upper, ActivationParameter)
  if not is_tensor(lower > upper) and lower > upper:
    raise ValueError("Upper bound for Randomized Rectifier needs "
                     "to be higher than lower bound.")
  # ====== check shared_axes ====== #
  if shared_axes == 'auto':
    shared_axes = (0,) + tuple(range(2, ndims))
  elif shared_axes == 'all':
    shared_axes = tuple(range(ndims))
  elif isinstance(shared_axes, int):
    shared_axes = (shared_axes,)
  else:
    shared_axes = shared_axes
  # ====== main logic ====== #
  if not is_training() or upper == lower:
    x = relu(x, alpha=(upper + lower) / 2.0)
  else: # Training mode
    shape = list(input_shape)
    if builtins.any(s is None for s in shape):
      shape = list(x.shape)
    for ax in shared_axes:
      shape[ax] = 1
    rnd = tf.random_uniform(tuple(shape),
               minval=lower,
               maxval=upper,
               dtype=x.dtype.base_dtype,
               seed=randint())
    x = relu(x, rnd)
  return x
Beispiel #13
0
def apply_dropout(x,
                  level=0.5,
                  noise_dims=None,
                  noise_type='uniform',
                  rescale=True,
                  name="ApplyDropout"):
    """Computes dropout.

  With probability `keep_prob`, outputs the input element scaled up by
  `1 / keep_prob`, otherwise outputs `0`.  The scaling is so that the expected
  sum is unchanged.


  Parameters
  ----------
  x: A tensor.
      input tensor
  level: float(0.-1.)
      probability dropout values in given tensor
  noise_dims: int or list(int)
      these dimensions will be setted to 1 in noise_shape, and
      used to broadcast the dropout mask.
  noise_type: 'gaussian' (or 'normal'), 'uniform'
      distribution used for generating noise
  rescale: bool
      whether rescale the outputs by dividing the retain probablity
  seed: random seed or `tensor.rng`
      random generator from tensor class

  References
  ----------
  [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)

  Note
  ----
  This function only apply noise on Variable when training is enable
  """
    shape = tf.shape(x)
    retain_prob = 1. - level
    # ====== not a training variable NO dropout ====== #
    if 'normal' in noise_type or 'gaussian' in noise_type:
        randfunc = lambda shape: tf.random_normal(
            shape=shape,
            mean=1.0,
            stddev=np.sqrt((1.0 - retain_prob) / retain_prob),
            dtype=x.dtype.base_dtype,
            seed=randint())
    elif 'uniform' in noise_type:
        randfunc = lambda shape: random_binomial(shape=shape,
                                                 p=retain_prob,
                                                 dtype=x.dtype.base_dtype,
                                                 seed=randint())
    else:
        raise ValueError('No support for noise_type=' + noise_type)

    # ====== Dropout ====== #
    def training_fn():
        noise_shape = shape if noise_dims is None else \
            _process_noise_dim(shape, noise_dims)
        y = x * randfunc(shape=noise_shape)
        if rescale:
            y /= retain_prob
        return y

    def inference_fn():
        return x

    with tf.variable_scope(name):
        return tf.cond(is_training(), training_fn, inference_fn)
Beispiel #14
0
def randrectify(x,
                lower=0.3,
                upper=0.8,
                shared_axes='auto',
                name="RandRectify"):
    """ This function is adpated from Lasagne
  Original work Copyright (c) 2014-2015 lasagne contributors
  All rights reserved.
  LICENSE: https://github.com/Lasagne/Lasagne/blob/master/LICENSE

  Applies a randomized leaky rectify activation to x.

  The randomized leaky rectifier was first proposed and used in the Kaggle
  NDSB Competition, and later evaluated in [1]_. Compared to the standard
  leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope
  for negative input during training, and a fixed slope during evaluation.

  Equation for the randomized rectifier linear unit during training:
  :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)`

  During evaluation, the factor is fixed to the arithmetic mean of `lower`
  and `upper`.

  Parameters
  ----------
  lower : Theano shared variable, expression, or constant
      The lower bound for the randomly chosen slopes.

  upper : Theano shared variable, expression, or constant
      The upper bound for the randomly chosen slopes.

  shared_axes : 'auto', 'all', int or tuple of int
      The axes along which the random slopes of the rectifier units are
      going to be shared. If ``'auto'`` (the default), share over all axes
      except for the second - this will share the random slope over the
      minibatch dimension for dense layers, and additionally over all
      spatial dimensions for convolutional layers. If ``'all'``, share over
      all axes, thus using a single random slope.

   References
  ----------
  .. [1] Bing Xu, Naiyan Wang et al. (2015):
     Empirical Evaluation of Rectified Activations in Convolutional Network,
     http://arxiv.org/abs/1505.00853
  """
    ndims = x.shape.ndims
    # ====== check lower and upper ====== #
    if is_variable(lower):
        add_roles(lower, ActivationParameter)
    if is_variable(upper):
        add_roles(upper, ActivationParameter)
    if not is_tensor(lower > upper) and lower > upper:
        raise ValueError("Upper bound for Randomized Rectifier needs "
                         "to be higher than lower bound.")
    # ====== check shared_axes ====== #
    if shared_axes == 'auto':
        shared_axes = (0, ) + tuple(range(2, ndims))
    elif shared_axes == 'all':
        shared_axes = tuple(range(ndims))
    elif isinstance(shared_axes, int):
        shared_axes = (shared_axes, )
    else:
        shared_axes = shared_axes
    # ====== main logic ====== #
    if not is_training() or upper == lower:
        x = relu(x, alpha=(upper + lower) / 2.0)
    else:  # Training mode
        shape = list(input_shape)
        if builtins.any(s is None for s in shape):
            shape = list(x.shape)
        for ax in shared_axes:
            shape[ax] = 1
        rnd = tf.random_uniform(tuple(shape),
                                minval=lower,
                                maxval=upper,
                                dtype=x.dtype.base_dtype,
                                seed=randint())
        x = relu(x, rnd)
    return x