def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) main_block = block.program.global_block() # Create beta1 and beta2 power tensors beta_shape = [1] self._beta1_pow_acc = self.helper.create_global_variable( name=unique_name('beta1_pow_acc'), dtype='float32', shape=beta_shape, lod_level=0, persistable=True) self.helper.set_variable_initializer(self._beta1_pow_acc, initializer=Constant(self._beta1)) self._beta2_pow_acc = self.helper.create_global_variable( name=unique_name('beta2_pow_acc'), dtype='float32', shape=beta_shape, lod_level=0, persistable=True) self.helper.set_variable_initializer(self._beta2_pow_acc, initializer=Constant(self._beta2)) # Create accumulator tensors for first and second moments for p in parameters: self._add_accumulator(self._moment1_acc_str, p) self._add_accumulator(self._moment2_acc_str, p)
def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) main_block = block.program.global_block() # Create beta1 and beta2 power tensors beta_shape = [1] self._beta1_pow_acc = self.helper.create_global_variable( name=unique_name('beta1_pow_acc'), dtype='float32', shape=beta_shape, lod_level=0, persistable=True) self.helper.set_variable_initializer( self._beta1_pow_acc, initializer=Constant(self._beta1)) self._beta2_pow_acc = self.helper.create_global_variable( name=unique_name('beta2_pow_acc'), dtype='float32', shape=beta_shape, lod_level=0, persistable=True) self.helper.set_variable_initializer( self._beta2_pow_acc, initializer=Constant(self._beta2)) # Create accumulator tensors for first and second moments for p in parameters: self._add_accumulator(self._moment1_acc_str, p) self._add_accumulator(self._moment2_acc_str, p)
def _add_accumulator(self, name, param, dtype=None, fill_value=0.0): """Utility function to add an accumulator for a parameter Args: block: the block in which the loss variable is present name: name of the accumulator param: parameter variable for which accumulator is to be added dtype: data type of the accumulator variable fill_value: value to initialize the accumulator variable """ if (name in self._accumulators and param.name in self._accumulators[name]): raise Exception( "Accumulator {} already exists for parameter {}".format( name, param.name)) assert isinstance(self.helper, LayerHelper) var = self.helper.create_global_variable(name=unique_name(name), persistable=True, dtype=dtype or param.dtype, type=param.type, shape=param.shape) self.helper.set_variable_initializer( var, initializer=Constant(value=float(fill_value))) self._accumulators[name][param.name] = var
def _add_accumulator(self, name, param, dtype=None, fill_value=0.0): """Utility function to add an accumulator for a parameter Args: block: the block in which the loss variable is present name: name of the accumulator param: parameter variable for which accumulator is to be added dtype: data type of the accumulator variable fill_value: value to initialize the accumulator variable """ if (name in self._accumulators and param.name in self._accumulators[name]): raise Exception("Accumulator {} already exists for parameter {}". format(name, param.name)) assert isinstance(self.helper, LayerHelper) var = self.helper.create_global_variable( name=unique_name(name), persistable=True, dtype=dtype or param.dtype, type=param.type, shape=param.shape) self.helper.set_variable_initializer( var, initializer=Constant(value=float(fill_value))) self._accumulators[name][param.name] = var
def _create_param_lr(self, param_and_grad): # create learning rate variable for every parameter param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] param_lr_shape = [1] param_lr_var = self.helper.create_global_variable( name=unique_name("learning_rate"), dtype='float32', shape=param_lr_shape, lod_level=1, persistable=True) param_lr = param_lr * self._learning_rate self.helper.set_variable_initializer(var=param_lr_var, initializer=Constant(param_lr)) return param_lr_var
def _create_param_lr(self, param_and_grad): # create learning rate variable for every parameter param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] param_lr_shape = [1] param_lr_var = self.helper.create_global_variable( name=unique_name("learning_rate"), dtype='float32', shape=param_lr_shape, lod_level=1, persistable=True) param_lr = param_lr * self._learning_rate self.helper.set_variable_initializer( var=param_lr_var, initializer=Constant(param_lr)) return param_lr_var
def _create_accumulators(self, block, parameters): # Create beta1 power accumulator tensor beta_shape = [1] self._beta1_pow_acc = self.helper.create_global_variable( name=unique_name('beta1_pow_acc'), dtype='float32', shape=beta_shape, lod_level=0, persistable=True) self.helper.set_variable_initializer(self._beta1_pow_acc, initializer=Constant(self._beta1)) # Create accumulator tensors for first moment and infinity norm for p in parameters: self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p)
def _create_accumulators(self, block, parameters): # Create beta1 power accumulator tensor beta_shape = [1] self._beta1_pow_acc = self.helper.create_global_variable( name=unique_name('beta1_pow_acc'), dtype='float32', shape=beta_shape, lod_level=0, persistable=True) self.helper.set_variable_initializer( self._beta1_pow_acc, initializer=Constant(self._beta1)) # Create accumulator tensors for first moment and infinity norm for p in parameters: self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p)
def create_state(self, suffix, dtype, shape): """ Create state variable. NOTE: It is not a public API. Args: suffix(str): the state suffix. dtype(str|core.DataType): the state data type shape(tuple|list): the shape of state Returns: State variable """ state = self.helper.create_variable(name="_".join( [unique_name(self.helper.name), suffix]), persistable=True, dtype=dtype, shape=shape) self.states.append(state) return state
def create_state(self, suffix, dtype, shape): """ Create state variable. NOTE: It is not a public API. Args: suffix(str): the state suffix. dtype(str|core.DataType): the state data type shape(tuple|list): the shape of state Returns: State variable """ state = self.helper.create_variable( name="_".join([unique_name(self.helper.name), suffix]), persistable=True, dtype=dtype, shape=shape) self.states.append(state) return state
def create_parameter(self, attr, shape, dtype, is_bias=False, default_initializer=None): # Deepcopy the attr so that parameters can be shared in program assert isinstance(attr, ParamAttr) suffix = 'b' if is_bias else 'w' if default_initializer is None: if is_bias: attr.set_default_bias_initializer() else: attr.set_default_param_initializer() else: attr.set_default_initializer(default_initializer) if attr.name is None: attr.name = unique_name(".".join([self.name, suffix])) self.startup_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True)) return self.main_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr.to_kwargs())
def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var(name=unique_name( ".".join([self.name, 'tmp'])), dtype=dtype, persistable=False)
def __init__(self, layer_type, **kwargs): self.kwargs = kwargs self.layer_type = layer_type name = self.kwargs.get('name', None) if name is None: self.kwargs['name'] = unique_name(self.layer_type)
def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var( name=unique_name(".".join([self.name, 'tmp'])), dtype=dtype, persistable=False)