def __init__(self, params): super(DepthwiseConv2DLayer, self).__init__(params) p = self.params assert p.name w_pc = py_utils.WeightParams( shape=p.filter_shape, init=p.params_init, dtype=p.dtype, collections=[self.__class__.__name__ + '_vars']) with tf.variable_scope(p.name): self.CreateVariable('w', w_pc) if p.weight_norm: self.CreateVariable( 'g', py_utils.WeightParams( shape=[p.filter_shape[2], p.filter_shape[3]], init=py_utils.WeightInit.Constant(0.0), dtype=p.dtype, collections=[self.__class__.__name__ + '_vars'])) if p.bias: # NOTE(jiahuiyu): bias is subject to LP regularization in this version. self.CreateVariable( 'b', py_utils.WeightParams( shape=[self.output_channels], init=py_utils.WeightInit.Constant(0.0), dtype=p.dtype, collections=[self.__class__.__name__ + '_vars']))
def __init__(self, params): super(BatchNormLayer, self).__init__(params) p = self.params assert p.name pc = py_utils.WeightParams( shape=[p.dim], init=py_utils.WeightInit.Constant(0.0), dtype=p.dtype, collections=[self.__class__.__name__ + '_vars']) with tf.variable_scope(p.name): if not p.use_moving_avg_in_training: self.CreateVariable('beta', pc) if p.gamma_zero_init: # zero initialization to BN gamma self.CreateVariable('gamma', pc) else: # Note, The real gamma to use is 1 + gamma. self.CreateVariable('gamma', pc, lambda x: 1.0 + x) # Two statistics. moving_collections = ['moving_vars', self.__class__.__name__ + '_vars'] if p.add_stats_to_moving_average_variables: moving_collections += [tf.GraphKeys.MOVING_AVERAGE_VARIABLES] elif p.add_stats_to_moving_average_variables is None: # TODO(rpang): force all models to set this param explicitly. tf.logging.warning( 'BatchNormLayer.add_stats_to_moving_average_variables should be ' 'set to True for new models, and to False explicitly for ' 'checkpoint compatibility.') # Add to the MOVING_AVERAGE_VARIABLES collection so that they are returned # by tf.moving_average_variables() and included in EMA variables if # ema_decay is enabled. mva = py_utils.WeightParams( shape=[p.dim], init=py_utils.WeightInit.Constant(0.0), dtype=p.dtype, collections=moving_collections) self.CreateVariable( 'moving_mean', mva, trainable=False, aggregation=tf.VariableAggregation.MEAN) mvv = py_utils.WeightParams( shape=[p.dim], init=py_utils.WeightInit.Constant(1.0), dtype=p.dtype, collections=moving_collections) self.CreateVariable( 'moving_variance', mvv, trainable=False, aggregation=tf.VariableAggregation.MEAN) self._epsilon = 0.001 self._decay = p.decay
def __init__(self, params): super(IdentityRegressionTask, self).__init__(params) with tf.variable_scope('IdentityRegressionTask'): self.CreateVariable( 'm', py_utils.WeightParams(shape=[], init=py_utils.WeightInit.Uniform())) self.CreateVariable( 'b', py_utils.WeightParams(shape=[], init=py_utils.WeightInit.Uniform())) self.global_steps = [] self.metrics = [] self.result_per_example_tensors = []
def CreateTensor(self, t_name): p = self.params assert t_name not in self._t_names, ('QTensor already registered: %s' % t_name) self._t_names.add(t_name) # Create accumulator accumulator_name = self._GetAccumulatorNameForTensor(t_name) self.RegisterAccumulator(accumulator_name, _CountedMinMaxAccumulator(p.dtype)) # Register vars. min_pc = py_utils.WeightParams( (), py_utils.WeightInit.Constant(p.default_min), p.dtype) max_pc = py_utils.WeightParams( (), py_utils.WeightInit.Constant(p.default_max), p.dtype) self._CreateQStateVar(t_name, 'min', min_pc) self._CreateQStateVar(t_name, 'max', max_pc)
def __init__(self, name): self._name = name _, self._var = py_utils.CreateVariable( name=name, params=py_utils.WeightParams([], py_utils.WeightInit.Constant(0), tf.int64), trainable=False) self._value = self._var.value() + 0 # Makes a copy.
def __init__(self, params): super(BatchNormLayerNoPadding, self).__init__(params) p = self.params assert p.name, 'Name of BatchNormLayerNoPadding is not set.' p.fprop_dtype = None # Skip L-P regularization for these variables. collections = [ self.__class__.__name__ + '_vars', py_utils.SKIP_LP_REGULARIZATION ] pc = py_utils.WeightParams( shape=[p.dim], init=py_utils.WeightInit.Constant(0.0), dtype=p.dtype, collections=collections) with tf.variable_scope(p.name): self.CreateVariable('beta', pc) # Note, The real gamma to use is 1 + gamma. self.CreateVariable('gamma', pc, lambda x: 1.0 + x) moving_collections = [ 'moving_vars', tf.GraphKeys.MOVING_AVERAGE_VARIABLES, self.__class__.__name__ + '_vars' ] mva = py_utils.WeightParams( shape=[p.dim], init=py_utils.WeightInit.Constant(0.0), dtype=p.dtype, collections=moving_collections) # Two statistics computed from sufficient stats. self.CreateVariable('moving_mean', mva, trainable=False) mvv = py_utils.WeightParams( shape=[p.dim], init=py_utils.WeightInit.Constant(1.0), dtype=p.dtype, collections=moving_collections) self.CreateVariable('moving_variance', mvv, trainable=False) # Accumulate bn sufficient stats over micro-batches. dim = self.vars.beta.shape[0] self.RegisterAccumulator('counts', AddingAccumulator([], p.dtype)) self.RegisterAccumulator('mean_ss', AddingAccumulator([dim], p.dtype)) self.RegisterAccumulator('variance_ss', AddingAccumulator([dim], p.dtype))
def __init__(self, params): super(BiasLayer, self).__init__(params) p = self.params with tf.variable_scope(p.name): self.CreateVariable( 'b', py_utils.WeightParams( shape=[p.dims], init=py_utils.WeightInit.Constant(0.0), dtype=p.dtype, collections=[self.__class__.__name__ + '_vars']))
def __init__(self, params): super(LinearLayer, self).__init__(params) p = self.params with tf.variable_scope(p.name): self.CreateVariable( 'w', py_utils.WeightParams( shape=[p.input_dims, p.output_dims], init=p.params_init, dtype=p.dtype, collections=[self.__class__.__name__ + '_vars']))
def _Acc(vg): """Updating accumulators.""" v, g = vg with tf.variable_scope(v.op.name): _, a = py_utils.CreateVariable( 'grad_accumulator', py_utils.WeightParams(v.get_shape(), py_utils.WeightInit.Constant(0.0), self.params.dtype), trainable=False) a = tf.assign_add(a, g) return py_utils.VarGrad(v, a)
def __init__(self, params): super(DevBasedSchedule, self).__init__(params) p = self.params with tf.variable_scope(p.name): wp = py_utils.WeightParams(shape=[], init=py_utils.WeightInit.Constant(1.0), collections=['DevBasedSchedule_vars'], dtype=tf.float32) _, self._cur_factor, = py_utils.CreateVariable('cur_factor', wp, trainable=False) wp = py_utils.WeightParams(shape=[], init=py_utils.WeightInit.Constant(0), collections=['DevBasedSchedule_vars'], dtype=tf.int64) _, self._ref_step, = py_utils.CreateVariable('ref_step', wp, trainable=False) self._metric_history = early_stop.MetricHistory(p.metric_history) self._best_step = ops.best_step(self._metric_history.hist_file, p.tolerance)
def CreateVariable(self, name, var_params, theta_fn=None, **kwargs): """Create a variable of this layer according to the parameter `var_params`. E.g.:: def __init__(self, ...): # A layer's constructor self.CreateVariable( 'weight', py_utils.WeightParams(shape=[100, 100])) `theta_fn` is used to apply a simple transformation on the created variable's value before used by the forward computation. E.g., to add the global variational noise according to this layer's parameter, one can do:: def __init__(self, ...): # A layer's constructor self.CreateVariable( name='weight', var_params=py_utils.WeightParams(shape=[100, 100]), theta_fn=self.AddGlobalVN) Args: name: Variable name which is used as the key into vars/theta. var_params: `Params` used to create the variable. theta_fn: A python function that takes a variable's value and returns a new value to be used later for computation. Its signature must be (tf.Tensor) -> (tf.Tensor). **kwargs: Keyword args passed to `.py_utils.CreateVariable`. """ self._CheckName(name) if (self.params.skip_lp_regularization and py_utils.SKIP_LP_REGULARIZATION not in var_params.collections): var_params = py_utils.WeightParams( shape=var_params.shape, dtype=var_params.dtype, init=var_params.init, collections=(var_params.collections + [py_utils.SKIP_LP_REGULARIZATION])) self._var_symbolic_shape_map[name] = var_params.shape value, var = py_utils.CreateVariable( name, var_params, default_seed=self.params.random_seed, **kwargs) self._private_vars[name] = var if theta_fn is not None: value = theta_fn(value) self._private_theta[name] = value
def __init__(self, params): super(SoftCondLayer, self).__init__(params) p = self.params assert p.name assert p.num_experts assert p.cond_dim with tf.variable_scope(p.name): # Create Variables for task weight mapping. collections = [ self.__class__.__name__ + '_vars', ] w_p = py_utils.WeightParams( shape=[p.cond_dim, p.num_experts], init=p.params_init, # TODO(huangyp): try zero init instead. dtype=p.dtype, collections=collections) self.CreateVariable('w', w_p) # Prepends p.num_experts to the tensor shape of every variable created # by p.body. with py_utils.VariableShapePrefixContext(p.num_experts): self.CreateChild('body', p.body)