Esempio n. 1
0
    def __init__(self, params):
        super(DepthwiseConv2DLayer, self).__init__(params)
        p = self.params
        assert p.name
        w_pc = py_utils.WeightParams(
            shape=p.filter_shape,
            init=p.params_init,
            dtype=p.dtype,
            collections=[self.__class__.__name__ + '_vars'])

        with tf.variable_scope(p.name):
            self.CreateVariable('w', w_pc)
            if p.weight_norm:
                self.CreateVariable(
                    'g',
                    py_utils.WeightParams(
                        shape=[p.filter_shape[2], p.filter_shape[3]],
                        init=py_utils.WeightInit.Constant(0.0),
                        dtype=p.dtype,
                        collections=[self.__class__.__name__ + '_vars']))
            if p.bias:
                # NOTE(jiahuiyu): bias is subject to LP regularization in this version.
                self.CreateVariable(
                    'b',
                    py_utils.WeightParams(
                        shape=[self.output_channels],
                        init=py_utils.WeightInit.Constant(0.0),
                        dtype=p.dtype,
                        collections=[self.__class__.__name__ + '_vars']))
Esempio n. 2
0
  def __init__(self, params):
    super(BatchNormLayer, self).__init__(params)
    p = self.params
    assert p.name

    pc = py_utils.WeightParams(
        shape=[p.dim],
        init=py_utils.WeightInit.Constant(0.0),
        dtype=p.dtype,
        collections=[self.__class__.__name__ + '_vars'])

    with tf.variable_scope(p.name):
      if not p.use_moving_avg_in_training:
        self.CreateVariable('beta', pc)
        if p.gamma_zero_init:
          # zero initialization to BN gamma
          self.CreateVariable('gamma', pc)
        else:
          # Note, The real gamma to use is 1 + gamma.
          self.CreateVariable('gamma', pc, lambda x: 1.0 + x)

      # Two statistics.
      moving_collections = ['moving_vars', self.__class__.__name__ + '_vars']
      if p.add_stats_to_moving_average_variables:
        moving_collections += [tf.GraphKeys.MOVING_AVERAGE_VARIABLES]
      elif p.add_stats_to_moving_average_variables is None:
        # TODO(rpang): force all models to set this param explicitly.
        tf.logging.warning(
            'BatchNormLayer.add_stats_to_moving_average_variables should be '
            'set to True for new models, and to False explicitly for '
            'checkpoint compatibility.')
      # Add to the MOVING_AVERAGE_VARIABLES collection so that they are returned
      # by tf.moving_average_variables() and included in EMA variables if
      # ema_decay is enabled.
      mva = py_utils.WeightParams(
          shape=[p.dim],
          init=py_utils.WeightInit.Constant(0.0),
          dtype=p.dtype,
          collections=moving_collections)
      self.CreateVariable(
          'moving_mean',
          mva,
          trainable=False,
          aggregation=tf.VariableAggregation.MEAN)

      mvv = py_utils.WeightParams(
          shape=[p.dim],
          init=py_utils.WeightInit.Constant(1.0),
          dtype=p.dtype,
          collections=moving_collections)
      self.CreateVariable(
          'moving_variance',
          mvv,
          trainable=False,
          aggregation=tf.VariableAggregation.MEAN)
    self._epsilon = 0.001
    self._decay = p.decay
 def __init__(self, params):
   super(IdentityRegressionTask, self).__init__(params)
   with tf.variable_scope('IdentityRegressionTask'):
     self.CreateVariable(
         'm',
         py_utils.WeightParams(shape=[], init=py_utils.WeightInit.Uniform()))
     self.CreateVariable(
         'b',
         py_utils.WeightParams(shape=[], init=py_utils.WeightInit.Uniform()))
   self.global_steps = []
   self.metrics = []
   self.result_per_example_tensors = []
Esempio n. 4
0
    def CreateTensor(self, t_name):
        p = self.params
        assert t_name not in self._t_names, ('QTensor already registered: %s' %
                                             t_name)
        self._t_names.add(t_name)

        # Create accumulator
        accumulator_name = self._GetAccumulatorNameForTensor(t_name)
        self.RegisterAccumulator(accumulator_name,
                                 _CountedMinMaxAccumulator(p.dtype))
        # Register vars.
        min_pc = py_utils.WeightParams(
            (), py_utils.WeightInit.Constant(p.default_min), p.dtype)
        max_pc = py_utils.WeightParams(
            (), py_utils.WeightInit.Constant(p.default_max), p.dtype)
        self._CreateQStateVar(t_name, 'min', min_pc)
        self._CreateQStateVar(t_name, 'max', max_pc)
Esempio n. 5
0
 def __init__(self, name):
   self._name = name
   _, self._var = py_utils.CreateVariable(
       name=name,
       params=py_utils.WeightParams([], py_utils.WeightInit.Constant(0),
                                    tf.int64),
       trainable=False)
   self._value = self._var.value() + 0  # Makes a copy.
Esempio n. 6
0
  def __init__(self, params):
    super(BatchNormLayerNoPadding, self).__init__(params)
    p = self.params
    assert p.name, 'Name of BatchNormLayerNoPadding is not set.'
    p.fprop_dtype = None

    # Skip L-P regularization for these variables.
    collections = [
        self.__class__.__name__ + '_vars', py_utils.SKIP_LP_REGULARIZATION
    ]
    pc = py_utils.WeightParams(
        shape=[p.dim],
        init=py_utils.WeightInit.Constant(0.0),
        dtype=p.dtype,
        collections=collections)

    with tf.variable_scope(p.name):
      self.CreateVariable('beta', pc)
      # Note, The real gamma to use is 1 + gamma.
      self.CreateVariable('gamma', pc, lambda x: 1.0 + x)

      moving_collections = [
          'moving_vars', tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
          self.__class__.__name__ + '_vars'
      ]
      mva = py_utils.WeightParams(
          shape=[p.dim],
          init=py_utils.WeightInit.Constant(0.0),
          dtype=p.dtype,
          collections=moving_collections)
      # Two statistics computed from sufficient stats.
      self.CreateVariable('moving_mean', mva, trainable=False)
      mvv = py_utils.WeightParams(
          shape=[p.dim],
          init=py_utils.WeightInit.Constant(1.0),
          dtype=p.dtype,
          collections=moving_collections)
      self.CreateVariable('moving_variance', mvv, trainable=False)

    # Accumulate bn sufficient stats over micro-batches.
    dim = self.vars.beta.shape[0]
    self.RegisterAccumulator('counts', AddingAccumulator([], p.dtype))
    self.RegisterAccumulator('mean_ss', AddingAccumulator([dim], p.dtype))
    self.RegisterAccumulator('variance_ss', AddingAccumulator([dim], p.dtype))
Esempio n. 7
0
 def __init__(self, params):
     super(BiasLayer, self).__init__(params)
     p = self.params
     with tf.variable_scope(p.name):
         self.CreateVariable(
             'b',
             py_utils.WeightParams(
                 shape=[p.dims],
                 init=py_utils.WeightInit.Constant(0.0),
                 dtype=p.dtype,
                 collections=[self.__class__.__name__ + '_vars']))
Esempio n. 8
0
 def __init__(self, params):
     super(LinearLayer, self).__init__(params)
     p = self.params
     with tf.variable_scope(p.name):
         self.CreateVariable(
             'w',
             py_utils.WeightParams(
                 shape=[p.input_dims, p.output_dims],
                 init=p.params_init,
                 dtype=p.dtype,
                 collections=[self.__class__.__name__ + '_vars']))
Esempio n. 9
0
        def _Acc(vg):
            """Updating accumulators."""

            v, g = vg
            with tf.variable_scope(v.op.name):
                _, a = py_utils.CreateVariable(
                    'grad_accumulator',
                    py_utils.WeightParams(v.get_shape(),
                                          py_utils.WeightInit.Constant(0.0),
                                          self.params.dtype),
                    trainable=False)
                a = tf.assign_add(a, g)

            return py_utils.VarGrad(v, a)
Esempio n. 10
0
    def __init__(self, params):
        super(DevBasedSchedule, self).__init__(params)

        p = self.params

        with tf.variable_scope(p.name):
            wp = py_utils.WeightParams(shape=[],
                                       init=py_utils.WeightInit.Constant(1.0),
                                       collections=['DevBasedSchedule_vars'],
                                       dtype=tf.float32)
            _, self._cur_factor, = py_utils.CreateVariable('cur_factor',
                                                           wp,
                                                           trainable=False)
            wp = py_utils.WeightParams(shape=[],
                                       init=py_utils.WeightInit.Constant(0),
                                       collections=['DevBasedSchedule_vars'],
                                       dtype=tf.int64)
            _, self._ref_step, = py_utils.CreateVariable('ref_step',
                                                         wp,
                                                         trainable=False)

            self._metric_history = early_stop.MetricHistory(p.metric_history)
            self._best_step = ops.best_step(self._metric_history.hist_file,
                                            p.tolerance)
Esempio n. 11
0
    def CreateVariable(self, name, var_params, theta_fn=None, **kwargs):
        """Create a variable of this layer according to the parameter `var_params`.

    E.g.::

        def __init__(self, ...):    # A layer's constructor
          self.CreateVariable(
              'weight', py_utils.WeightParams(shape=[100, 100]))

    `theta_fn` is used to apply a simple transformation on the created
    variable's value before used by the forward computation. E.g., to
    add the global variational noise according to this layer's
    parameter, one can do::

        def __init__(self, ...):    # A layer's constructor
          self.CreateVariable(
            name='weight',
            var_params=py_utils.WeightParams(shape=[100, 100]),
            theta_fn=self.AddGlobalVN)

    Args:
      name: Variable name which is used as the key into vars/theta.
      var_params: `Params` used to create the variable.
      theta_fn: A python function that takes a variable's value and returns a
        new value to be used later for computation. Its signature must be
        (tf.Tensor) -> (tf.Tensor).
      **kwargs: Keyword args passed to `.py_utils.CreateVariable`.
    """
        self._CheckName(name)
        if (self.params.skip_lp_regularization and
                py_utils.SKIP_LP_REGULARIZATION not in var_params.collections):
            var_params = py_utils.WeightParams(
                shape=var_params.shape,
                dtype=var_params.dtype,
                init=var_params.init,
                collections=(var_params.collections +
                             [py_utils.SKIP_LP_REGULARIZATION]))
        self._var_symbolic_shape_map[name] = var_params.shape
        value, var = py_utils.CreateVariable(
            name, var_params, default_seed=self.params.random_seed, **kwargs)
        self._private_vars[name] = var
        if theta_fn is not None:
            value = theta_fn(value)
        self._private_theta[name] = value
Esempio n. 12
0
 def __init__(self, params):
     super(SoftCondLayer, self).__init__(params)
     p = self.params
     assert p.name
     assert p.num_experts
     assert p.cond_dim
     with tf.variable_scope(p.name):
         # Create Variables for task weight mapping.
         collections = [
             self.__class__.__name__ + '_vars',
         ]
         w_p = py_utils.WeightParams(
             shape=[p.cond_dim, p.num_experts],
             init=p.params_init,  # TODO(huangyp): try zero init instead.
             dtype=p.dtype,
             collections=collections)
         self.CreateVariable('w', w_p)
         # Prepends p.num_experts to the tensor shape of every variable created
         # by p.body.
         with py_utils.VariableShapePrefixContext(p.num_experts):
             self.CreateChild('body', p.body)