Ejemplo n.º 1
0
  def testNormalizeDataFormat(self):
    self.assertEqual(
        utils.normalize_data_format('Channels_Last'), 'channels_last')
    self.assertEqual(
        utils.normalize_data_format('CHANNELS_FIRST'), 'channels_first')

    with self.assertRaises(ValueError):
      utils.normalize_data_format('invalid')
Ejemplo n.º 2
0
    def testNormalizeDataFormat(self):
        self.assertEqual('channels_last',
                         utils.normalize_data_format('Channels_Last'))
        self.assertEqual('channels_first',
                         utils.normalize_data_format('CHANNELS_FIRST'))

        with self.assertRaises(ValueError):
            utils.normalize_data_format('invalid')
Ejemplo n.º 3
0
 def __init__(self, rank,
              filters,
              kernel_size,
              strides=1,
              padding='valid',
              data_format='channels_last',
              dilation_rate=1,
              activation=None,
              use_bias=True,
              kernel_initializer=None,
              bias_initializer=init_ops.zeros_initializer(),
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              trainable=True,
              name=None,
              **kwargs):
   super(_Conv, self).__init__(trainable=trainable,
                               name=name, **kwargs)
   self.rank = rank
   self.filters = filters
   self.kernel_size = utils.normalize_tuple(kernel_size, rank, 'kernel_size')
   self.strides = utils.normalize_tuple(strides, rank, 'strides')
   self.padding = utils.normalize_padding(padding)
   self.data_format = utils.normalize_data_format(data_format)
   self.dilation_rate = utils.normalize_tuple(
       dilation_rate, rank, 'dilation_rate')
   self.activation = activation
   self.use_bias = use_bias
   self.kernel_initializer = kernel_initializer
   self.bias_initializer = bias_initializer
   self.kernel_regularizer = kernel_regularizer
   self.bias_regularizer = bias_regularizer
   self.activity_regularizer = activity_regularizer
 def __init__(self, size=(2, 2), data_format=None, **kwargs):
   super(BilinearUpSampling2D, self).__init__(**kwargs)
   if data_format is None:
     self.data_format = K.image_data_format()
   else:
     self.data_format = conv_utils.normalize_data_format(data_format)
   self.size = conv_utils.normalize_tuple(size, 2, 'size')
   self.input_spec = InputSpec(ndim=4)
Ejemplo n.º 5
0
 def __init__(self, pool_function, pool_size, strides,
              padding='valid', data_format='channels_last',
              name=None, **kwargs):
   super(_Pooling1D, self).__init__(name=name, **kwargs)
   self.pool_function = pool_function
   self.pool_size = utils.normalize_tuple(pool_size, 1, 'pool_size')
   self.strides = utils.normalize_tuple(strides, 1, 'strides')
   self.padding = utils.normalize_padding(padding)
   self.data_format = utils.normalize_data_format(data_format)
Ejemplo n.º 6
0
 def __init__(self, pool_function, pool_size, strides,
              padding='valid', data_format='channels_last',
              name=None, **kwargs):
   super(_Pooling1D, self).__init__(name=name, **kwargs)
   self.pool_function = pool_function
   self.pool_size = utils.normalize_tuple(pool_size, 1, 'pool_size')
   self.strides = utils.normalize_tuple(strides, 1, 'strides')
   self.padding = utils.normalize_padding(padding)
   self.data_format = utils.normalize_data_format(data_format)
Ejemplo n.º 7
0
 def __init__(self,
              rank,
              filters,
              kernel_support,
              corr=False,
              strides_down=1,
              strides_up=1,
              padding="valid",
              extra_pad_end=True,
              channel_separable=False,
              data_format="channels_last",
              activation=None,
              use_bias=False,
              kernel_initializer=init_ops.VarianceScaling(),
              bias_initializer=init_ops.Zeros(),
              kernel_regularizer=None,
              bias_regularizer=None,
              kernel_parameterizer=parameterizers.RDFTParameterizer(),
              bias_parameterizer=None,
              **kwargs):
     super(_SignalConv, self).__init__(**kwargs)
     self._rank = int(rank)
     self._filters = int(filters)
     self._kernel_support = utils.normalize_tuple(kernel_support,
                                                  self._rank,
                                                  "kernel_support")
     self._corr = bool(corr)
     self._strides_down = utils.normalize_tuple(strides_down, self._rank,
                                                "strides_down")
     self._strides_up = utils.normalize_tuple(strides_up, self._rank,
                                              "strides_up")
     self._padding = str(padding).lower()
     try:
         self._pad_mode = {
             "valid": None,
             "same_zeros": "CONSTANT",
             "same_reflect": "REFLECT",
         }[self.padding]
     except KeyError:
         raise ValueError("Unsupported padding mode: '{}'".format(padding))
     self._extra_pad_end = bool(extra_pad_end)
     self._channel_separable = bool(channel_separable)
     self._data_format = utils.normalize_data_format(data_format)
     self._activation = activation
     self._use_bias = bool(use_bias)
     self._kernel_initializer = kernel_initializer
     self._bias_initializer = bias_initializer
     self._kernel_regularizer = kernel_regularizer
     self._bias_regularizer = bias_regularizer
     self._kernel_parameterizer = kernel_parameterizer
     self._bias_parameterizer = bias_parameterizer
     self.input_spec = base.InputSpec(ndim=self._rank + 2)
Ejemplo n.º 8
0
 def __init__(self,
              rank,
              filters,
              kernel_size,
              strides=1,
              padding='valid',
              data_format='channels_last',
              dilation_rate=1,
              activation=None,
              use_scale=True,
              use_bias=True,
              kernel_initializer=None,
              scale_initializer=None,
              bias_initializer=init_ops.zeros_initializer(),
              scale_regularizer=None,
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              kernel_constraint=None,
              scale_constraint=None,
              bias_constraint=None,
              trainable=True,
              name=None,
              **kwargs):
     super(_ConvWNorm,
           self).__init__(trainable=trainable,
                          name=name,
                          activity_regularizer=activity_regularizer,
                          **kwargs)
     self.rank = rank
     self.filters = filters
     self.kernel_size = utils.normalize_tuple(kernel_size, rank,
                                              'kernel_size')
     self.strides = utils.normalize_tuple(strides, rank, 'strides')
     self.padding = utils.normalize_padding(padding)
     self.data_format = utils.normalize_data_format(data_format)
     self.dilation_rate = utils.normalize_tuple(dilation_rate, rank,
                                                'dilation_rate')
     self.activation = activation
     self.use_scale = use_scale
     self.use_bias = use_bias
     self.kernel_initializer = kernel_initializer
     self.scale_initializer = scale_initializer
     self.bias_initializer = bias_initializer
     self.kernel_regularizer = kernel_regularizer
     self.scale_regularizer = scale_regularizer
     self.bias_regularizer = bias_regularizer
     self.kernel_constraint = kernel_constraint
     self.scale_constraint = scale_constraint
     self.bias_constraint = bias_constraint
     self.input_spec = base.InputSpec(ndim=self.rank + 2)
Ejemplo n.º 9
0
 def __init__(
     self,
     rank,
     filters,
     kernel_size,
     is_mc,
     strides=1,
     padding="valid",
     data_format="channels_last",
     dilation_rate=1,
     activation=None,
     activity_regularizer=None,
     kernel_posterior_fn=tfp_layers_util.default_mean_field_normal_fn(),
     kernel_posterior_tensor_fn=lambda d: d.sample(),
     kernel_prior_fn=tfp_layers_util.default_multivariate_normal_fn,
     kernel_divergence_fn=(lambda q, p, ignore: kl_lib.kl_divergence(q, p)),
     bias_posterior_fn=tfp_layers_util.default_mean_field_normal_fn(
         is_singular=True
     ),
     bias_posterior_tensor_fn=lambda d: d.sample(),
     bias_prior_fn=None,
     bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
     **kwargs
 ):
     super(_ConvVariational, self).__init__(
         activity_regularizer=activity_regularizer, **kwargs
     )
     self.rank = rank
     self.is_mc = is_mc
     self.filters = filters
     self.kernel_size = tf_layers_util.normalize_tuple(
         kernel_size, rank, "kernel_size"
     )
     self.strides = tf_layers_util.normalize_tuple(strides, rank, "strides")
     self.padding = tf_layers_util.normalize_padding(padding)
     self.data_format = tf_layers_util.normalize_data_format(data_format)
     self.dilation_rate = tf_layers_util.normalize_tuple(
         dilation_rate, rank, "dilation_rate"
     )
     self.activation = tf.keras.activations.get(activation)
     self.input_spec = tf.keras.layers.InputSpec(ndim=self.rank + 2)
     self.kernel_posterior_fn = kernel_posterior_fn
     self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn
     self.kernel_prior_fn = kernel_prior_fn
     self.kernel_divergence_fn = kernel_divergence_fn
     self.bias_posterior_fn = bias_posterior_fn
     self.bias_posterior_tensor_fn = bias_posterior_tensor_fn
     self.bias_prior_fn = bias_prior_fn
     self.bias_divergence_fn = bias_divergence_fn
Ejemplo n.º 10
0
 def __init__(self,
              pool_function,
              pool_size,
              strides,
              padding='valid',
              data_format='channels_last',
              name=None,
              quantizer=None,
              **kwargs):
     super(_Pooling2D, self).__init__(name=name, **kwargs)
     self.pool_function = pool_function
     self.pool_size = utils.normalize_tuple(pool_size, 2, 'pool_size')
     self.strides = utils.normalize_tuple(strides, 2, 'strides')
     self.padding = utils.normalize_padding(padding)
     self.data_format = utils.normalize_data_format(data_format)
     self.input_spec = base.InputSpec(ndim=4)
     self.quantizer = quantizer
Ejemplo n.º 11
0
    def __init__(self,
                 rank,
                 filters,
                 kernel_size,
                 strides=1,
                 padding="valid",
                 data_format="channels_last",
                 dilation_rate=1,
                 activation=None,
                 use_bias=True,
                 dropout_rate=0.5,
                 temperature=0.6,
                 gamma=-0.1,
                 zeta=1.1,
                 kernel_initializer=init.random_normal_initializer(0., 1e-2),
                 bias_initializer=init.zeros_initializer(),
                 trainable=True,
                 name=None,
                 **kwargs):
        super(_L0NormConv, self).__init__(trainable=trainable,
                                          name=name,
                                          **kwargs)
        self.rank = rank
        self.filters = filters
        self.kernel_size = utils.normalize_tuple(kernel_size, rank,
                                                 "kernel_size")
        self.strides = utils.normalize_tuple(strides, rank, "strides")
        self.padding = utils.normalize_padding(padding)
        self.data_format = utils.normalize_data_format(data_format)
        self.dilation_rate = utils.normalize_tuple(dilation_rate, rank,
                                                   "dilation_rate")
        self.activation = activation
        self.use_bias = use_bias
        self.dropout_rate = dropout_rate
        self.temperature = temperature
        self.gamma = gamma
        self.zeta = zeta
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer

        # Construct log_alpha initializer.
        alpha = dropout_rate / (1. - dropout_rate)
        self.log_alpha_initializer = init.random_normal_initializer(
            alpha, 0.01)
Ejemplo n.º 12
0
 def __init__(self,
              rank,
              filters,
              kernel_size,
              strides=1,
              padding='valid',
              data_format='channels_last',
              dilation_rate=1,
              activation=None,
              use_bias=True,
              kernel_initializer='glorot_uniform',
              bias_initializer='zeros',
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              kernel_constraint=None,
              bias_constraint=None,
              trainable=True,
              name=None,
              **kwargs):
   super(_MaskedConv, self).__init__(
       trainable=trainable,
       name=name,
       activity_regularizer=activity_regularizer,
       **kwargs)
   self.rank = rank
   self.filters = filters
   self.kernel_size = utils.normalize_tuple(kernel_size, rank, 'kernel_size')
   self.strides = utils.normalize_tuple(strides, rank, 'strides')
   self.padding = utils.normalize_padding(padding)
   self.data_format = utils.normalize_data_format(data_format)
   self.dilation_rate = utils.normalize_tuple(dilation_rate, rank,
                                              'dilation_rate')
   self.activation = activation
   self.use_bias = use_bias
   self.ones_initializer = initializers.get('ones')
   self.zeros_initializer = initializers.get('zeros')
   self.kernel_initializer = initializers.get(kernel_initializer)
   self.bias_initializer = initializers.get(bias_initializer)
   self.kernel_regularizer = regularizers.get(kernel_regularizer)
   self.bias_regularizer = regularizers.get(bias_regularizer)
   self.kernel_constraint = constraints.get(kernel_constraint)
   self.bias_constraint = constraints.get(bias_constraint)
   self.input_spec = InputSpec(ndim=self.rank + 2)
Ejemplo n.º 13
0
 def __init__(self,
              rank,
              filters,
              kernel_size,
              strides=1,
              padding="valid",
              data_format="channels_last",
              dilation_rate=1,
              activation=None,
              use_bias=True,
              trainable=True,
              local_reparametrization=False,
              flipout=False,
              seed=None,
              name=None,
              **kwargs):
     super(_ConvVariational, self).__init__(trainable=trainable,
                                            name=name,
                                            **kwargs)
     if local_reparametrization and flipout:
         raise ValueError('Cannot apply both flipout and local '
                          'reparametrizations for variance reduction.')
     self.rank = rank
     self.filters = filters
     self.kernel_size = utils.normalize_tuple(kernel_size, rank,
                                              "kernel_size")
     self.strides = utils.normalize_tuple(strides, rank, "strides")
     self.padding = utils.normalize_padding(padding)
     self.data_format = utils.normalize_data_format(data_format)
     self.dilation_rate = utils.normalize_tuple(dilation_rate, rank,
                                                "dilation_rate")
     self.activation = activation
     self.use_bias = use_bias
     self.local_reparametrization = local_reparametrization
     self.flipout = flipout
     self.seed = seed
Ejemplo n.º 14
0
    def __init__(
            self,
            filters,
            dau_units,
            max_kernel_size,
            strides=1,
            data_format='channels_first',
            activation=None,
            use_bias=True,
            weight_initializer=init_ops.random_normal_initializer(stddev=0.1),
            mu1_initializer=None,
            mu2_initializer=None,
            sigma_initializer=None,
            bias_initializer=init_ops.zeros_initializer(),
            weight_regularizer=None,
            mu1_regularizer=None,
            mu2_regularizer=None,
            sigma_regularizer=None,
            bias_regularizer=None,
            activity_regularizer=None,
            weight_constraint=None,
            mu1_constraint=None,
            mu2_constraint=None,
            sigma_constraint=None,
            bias_constraint=None,
            trainable=True,
            mu_learning_rate_factor=500,
            dau_unit_border_bound=0.01,
            dau_unit_single_dim=False,
            dau_aggregation_forbid_positive_dim1=False,
            unit_testing=False,  # for competability between CPU and GPU version (where gradients of last edge need to be ignored) during unit testing
            name=None,
            **kwargs):
        super(DAUConv2d,
              self).__init__(trainable=trainable,
                             name=name,
                             activity_regularizer=activity_regularizer,
                             **kwargs)
        self.rank = 2
        self.filters = filters
        self.dau_units = utils.normalize_tuple(dau_units, self.rank,
                                               'dau_components')
        self.max_kernel_size = max_kernel_size
        self.padding = np.floor(self.max_kernel_size / 2.0)
        self.strides = strides
        self.data_format = utils.normalize_data_format(data_format)
        self.activation = activation
        self.use_bias = use_bias
        self.bias_initializer = bias_initializer
        self.bias_regularizer = bias_regularizer
        self.bias_constraint = bias_constraint

        self.weight_initializer = weight_initializer
        self.weight_regularizer = weight_regularizer
        self.weight_constraint = weight_constraint

        self.mu1_initializer = mu1_initializer
        self.mu1_regularizer = mu1_regularizer
        self.mu1_constraint = mu1_constraint

        self.mu2_initializer = mu2_initializer
        self.mu2_regularizer = mu2_regularizer
        self.mu2_constraint = mu2_constraint

        self.sigma_initializer = sigma_initializer
        self.sigma_regularizer = sigma_regularizer
        self.sigma_constraint = sigma_constraint

        if self.mu1_initializer is None:
            self.mu1_initializer = DAUGridMean(
                dau_units=self.dau_units,
                max_value=np.floor(self.max_kernel_size / 2.0) - 1,
                dau_unit_axis=2)
        if self.mu2_initializer is None:
            self.mu2_initializer = DAUGridMean(
                dau_units=self.dau_units,
                max_value=np.floor(self.max_kernel_size / 2.0) - 1,
                dau_unit_axis=1)

        if self.sigma_initializer is None:
            self.sigma_initializer = init_ops.constant_initializer(0.5)

        self.mu_learning_rate_factor = mu_learning_rate_factor

        self.unit_testing = unit_testing

        self.input_spec = base.InputSpec(ndim=self.rank + 2)

        self.dau_unit_border_bound = dau_unit_border_bound
        self.num_dau_units_all = np.int32(np.prod(self.dau_units))
        self.num_dau_units_ignore = 0

        self.dau_unit_single_dim = dau_unit_single_dim
        self.dau_aggregation_forbid_positive_dim1 = dau_aggregation_forbid_positive_dim1
        # if we have less then 2 units per channel then or have odd number of them then add one more dummy unit
        # since computation is always done with 2 units at the same time (effectively set weight=0 for those dummy units)

        # make sure we have at least ALLOWED_UNITS_GROUP (this is requested so for fast version that can handle only factor of 2)
        if self.num_dau_units_all % self.DAU_UNITS_GROUP != 0:
            new_num_units = np.int32(
                np.ceil(self.num_dau_units_all / float(self.DAU_UNITS_GROUP)) *
                self.DAU_UNITS_GROUP)

            self.num_dau_units_ignore = new_num_units - self.num_dau_units_all

            if self.dau_units[0] < self.dau_units[1]:
                self.dau_units = (self.dau_units[0] +
                                  self.num_dau_units_ignore, self.dau_units[1])
            else:
                self.dau_units = (self.dau_units[0], self.dau_units[1] +
                                  self.num_dau_units_ignore)

            self.num_dau_units_all = new_num_units

            self.weight_initializer = ZeroNLast(
                self.weight_initializer,
                last_num_to_zero=self.num_dau_units_ignore,
                axis=2)

        self.dau_weights = None
        self.dau_mu1 = None
        self.dau_mu2 = None
        self.dau_sigma = None

        # show notice when using stride>1 that this is not implemented by CUDA code and is only emulating it (will have same computationa requirements as for stride=1)
        if self.strides > 1:
            tf.logging.warning(
                'NOTICE: using stride>=2 in DAU convolution uses the same computational resources as with '
                +
                'stride=1 (current implementation only emulates stride>=2 using tensor slicing).'
            )
Ejemplo n.º 15
0
    def __init__(self, filters,
                 dau_units,
                 max_kernel_size,
                 strides=1,
                 data_format='channels_first',
                 activation=None,
                 use_bias=True,
                 weight_initializer=init_ops.random_normal_initializer(stddev=0.1),
                 mu1_initializer=None,
                 mu2_initializer=None,
                 sigma_initializer=None,
                 bias_initializer=init_ops.zeros_initializer(),
                 weight_regularizer=None,
                 mu1_regularizer=None,
                 mu2_regularizer=None,
                 sigma_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 weight_constraint=None,
                 mu1_constraint=None,
                 mu2_constraint=None,
                 sigma_constraint=None,
                 bias_constraint=None,
                 trainable=True,
                 mu_learning_rate_factor=500,
                 dau_unit_border_bound=0.01,
                 dau_sigma_trainable=False,
                 name=None,
                 **kwargs):
        super(DAUConv2dTF, self).__init__(trainable=trainable, name=name,
                                    activity_regularizer=activity_regularizer,
                                    **kwargs)
        self.rank = 2
        self.filters = filters
        self.dau_units = utils.normalize_tuple(dau_units, self.rank, 'dau_components')
        self.max_kernel_size = max_kernel_size
        self.padding = np.floor(self.max_kernel_size/2.0)
        self.strides = strides
        self.data_format = utils.normalize_data_format(data_format)
        self.activation = activation
        self.use_bias = use_bias
        self.bias_initializer = bias_initializer
        self.bias_regularizer = bias_regularizer
        self.bias_constraint = bias_constraint

        self.weight_initializer = weight_initializer
        self.weight_regularizer = weight_regularizer
        self.weight_constraint = weight_constraint

        self.mu1_initializer = mu1_initializer
        self.mu1_regularizer = mu1_regularizer
        self.mu1_constraint = mu1_constraint

        self.mu2_initializer = mu2_initializer
        self.mu2_regularizer = mu2_regularizer
        self.mu2_constraint = mu2_constraint

        self.sigma_initializer = sigma_initializer
        self.sigma_regularizer = sigma_regularizer
        self.sigma_constraint = sigma_constraint

        if self.mu1_initializer is None:
            raise Exception("Must initialize MU1")
        if self.mu2_initializer is None:
            raise Exception("Must initialize MU2")

        if self.sigma_initializer is None:
            self.sigma_initializer=init_ops.constant_initializer(0.5)

        self.mu_learning_rate_factor = mu_learning_rate_factor

        self.input_spec = base.InputSpec(ndim=self.rank + 2)

        self.dau_unit_border_bound = dau_unit_border_bound
        self.num_dau_units_all = np.int32(np.prod(self.dau_units))

        self.dau_weights = None
        self.dau_mu1 = None
        self.dau_mu2 = None
        self.dau_sigma = None

        self.dau_sigma_trainable = dau_sigma_trainable
Ejemplo n.º 16
0
    def __init__(
            self,
            filters,
            dau_units,
            max_kernel_size,
            strides=1,
            data_format='channels_first',
            activation=None,
            use_bias=True,
            weight_initializer=init_ops.random_normal_initializer(stddev=0.1),
            mu1_initializer=None,
            mu2_initializer=None,
            sigma_initializer=None,
            bias_initializer=init_ops.zeros_initializer(),
            weight_regularizer=None,
            mu1_regularizer=None,
            mu2_regularizer=None,
            sigma_regularizer=None,
            bias_regularizer=None,
            activity_regularizer=None,
            weight_constraint=None,
            mu1_constraint=None,
            mu2_constraint=None,
            sigma_constraint=None,
            bias_constraint=None,
            trainable=True,
            mu_learning_rate_factor=500,
            unit_testing=False,  # for competability between CPU and GPU version (where gradients of last edge need to be ignored) during unit testing
            name=None,
            **kwargs):
        super(DAUConv2d,
              self).__init__(trainable=trainable,
                             name=name,
                             activity_regularizer=activity_regularizer,
                             **kwargs)
        self.rank = 2
        self.filters = filters
        self.dau_units = utils.normalize_tuple(dau_units, self.rank,
                                               'dau_components')
        self.max_kernel_size = utils.normalize_tuple(max_kernel_size,
                                                     self.rank,
                                                     'max_kernel_size')
        self.padding = list(
            map(lambda x: np.floor(x / 2.0), self.max_kernel_size))
        self.strides = utils.normalize_tuple(strides, self.rank, 'strides')
        self.data_format = utils.normalize_data_format(data_format)
        self.activation = activation
        self.use_bias = use_bias
        self.bias_initializer = bias_initializer
        self.bias_regularizer = bias_regularizer
        self.bias_constraint = bias_constraint

        self.weight_initializer = weight_initializer
        self.weight_regularizer = weight_regularizer
        self.weight_constraint = weight_constraint

        self.mu1_initializer = mu1_initializer
        self.mu1_regularizer = mu1_regularizer
        self.mu1_constraint = mu1_constraint

        self.mu2_initializer = mu2_initializer
        self.mu2_regularizer = mu2_regularizer
        self.mu2_constraint = mu2_constraint

        self.sigma_initializer = sigma_initializer
        self.sigma_regularizer = sigma_regularizer
        self.sigma_constraint = sigma_constraint

        if self.mu1_initializer is None:
            self.mu1_initializer = DAUGridMean(
                dau_units=self.dau_units,
                max_value=np.floor(self.max_kernel_size[1] / 2.0) - 1,
                dau_unit_axis=2)
        if self.mu2_initializer is None:
            self.mu2_initializer = DAUGridMean(
                dau_units=self.dau_units,
                max_value=np.floor(self.max_kernel_size[0] / 2.0) - 1,
                dau_unit_axis=1)

        if self.sigma_initializer is None:
            self.sigma_initializer = init_ops.constant_initializer(0.5)

        self.mu_learning_rate_factor = mu_learning_rate_factor

        self.unit_testing = unit_testing

        self.input_spec = base.InputSpec(ndim=self.rank + 2)

        self.num_dau_units_all = np.int32(np.prod(self.dau_units))
        self.num_dau_units_ignore = 0

        # if we have less then 2 units per channel then or have odd number of them then add one more dummy unit
        # since computation is always done with 2 units at the same time (effectively set weight=0 for those dummy units)

        # make sure we have at least ALLOWED_UNITS_GROUP (this is requested so for fast version that can handle only factor of 2)
        if self.num_dau_units_all % self.DAU_UNITS_GROUP != 0:
            new_num_units = np.int32(
                np.ceil(self.num_dau_units_all / float(self.DAU_UNITS_GROUP)) *
                self.DAU_UNITS_GROUP)

            self.num_dau_units_ignore = new_num_units - self.num_dau_units_all

            if self.dau_units[0] < self.dau_units[1]:
                self.dau_units = (self.dau_units[0] +
                                  self.num_dau_units_ignore, self.dau_units[1])
            else:
                self.dau_units = (self.dau_units[0], self.dau_units[1] +
                                  self.num_dau_units_ignore)

            self.num_dau_units_all = new_num_units

            self.weight_initializer = ZeroNLast(
                self.weight_initializer,
                last_num_to_zero=self.num_dau_units_ignore,
                axis=2)

        self.dau_weights = None
        self.dau_mu1 = None
        self.dau_mu2 = None
        self.dau_sigma = None