def add_slot(self, var, slot_name, initializer="zeros"): """Add a new slot variable for `var`.""" if slot_name not in self._slot_names: self._slot_names.append(slot_name) var_key = _var_key(var) slot_dict = self._slots.setdefault(var_key, {}) weight = slot_dict.get(slot_name, None) if weight is None: if isinstance(initializer, six.string_types) or callable(initializer): initializer = initializers.get(initializer) initial_value = functools.partial( initializer, shape=var.shape, dtype=var.dtype) else: initial_value = initializer weight = tf_variables.Variable( name="%s/%s" % (var._shared_name, slot_name), # pylint: disable=protected-access dtype=var.dtype, trainable=False, initial_value=initial_value) backend.track_variable(weight) slot_dict[slot_name] = weight self._restore_slot_variable( slot_name=slot_name, variable=var, slot_variable=weight) self._weights.append(weight) return weight
def _add_weight(self, name, shape=(), dtype=None, initializer='zeros'): """Adds a weight to this loss scale. Args: name: Variable name. shape: Variable shape. dtype: The type of the variable. initializer: The initializer to use. Returns: A variable. """ if isinstance(initializer, six.string_types) or callable(initializer): initializer = initializers.get(initializer) variable = self._add_variable_with_custom_getter( name=name, shape=shape, getter=base_layer_utils.make_variable, overwrite=True, initializer=initializer, dtype=dtype, trainable=False, use_resource=True, synchronization=variables.VariableSynchronization.AUTO, # Set aggregation to NONE, as loss scaling variables should never be # aggregated. aggregation=variables.VariableAggregation.NONE) backend.track_variable(variable) return variable
def opt_variable(value, dtype=None, name=None, constraint=None): """Instantiates a variable and returns it.""" if dtype is None: dtype = backend.floatx() variables = [] for i in range(num_replicas): # Keras holds the variables in optimizer class instance , so the name # does not matter here. ResourceVariable constructor will find a unique # name (including name=None) for each replica. with ops.device("device:TPU:{}".format(i)): v = resource_variable_ops.ResourceVariable( value, dtype=dtypes_module.as_dtype(dtype), name=name, constraint=constraint) variables.append(v) name = "replicate_{}_{}".format("variable" if name is None else name, ops.uid()) v = ReplicatedVariable(name, variables) # pylint: disable=protected-access if isinstance(value, np.ndarray): v._keras_shape = value.shape elif hasattr(value, "shape"): v._keras_shape = backend.int_shape(value) v._uses_learning_phase = False backend.track_variable(v) return v
def _variable_creator(self, next_creator, **kwargs): name = kwargs['name'] if name in self._variable_dict: return self._variable_dict[name] var = next_creator(**kwargs) self._variable_dict[name] = var if var.trainable: self._trainable_weights.append(var) else: self._non_trainable_weights.append(var) K.track_variable(var) return var
def add_weight(self, name, shape, dtype=None, initializer="zeros", trainable=None, synchronization=tf_variables.VariableSynchronization.AUTO, aggregation=tf_variables.VariableAggregation.NONE): if dtype is None: dtype = dtypes.float32 if isinstance(initializer, six.string_types) or callable(initializer): initializer = initializers.get(initializer) if synchronization == tf_variables.VariableSynchronization.ON_READ: if trainable: raise ValueError( "Synchronization value can be set to " "VariableSynchronization.ON_READ only for non-trainable variables. " "You have specified trainable=True and " "synchronization=VariableSynchronization.ON_READ.") else: # Set trainable to be false when variable is to be synced on read. trainable = False elif trainable is None: trainable = True variable = self._add_variable_with_custom_getter( name=name, shape=shape, getter=base_layer_utils.make_variable, overwrite=True, initializer=initializer, dtype=dtype, trainable=trainable, use_resource=True, synchronization=synchronization, aggregation=aggregation) backend.track_variable(variable) return variable
def __init__(self, optimizer, loss_scale): """Initializes this loss scale optimizer. Args: optimizer: The Optimizer instance to wrap. loss_scale: The loss scale to scale the loss and gradients. This can either be an int/float to use a fixed loss scale, the string "dynamic" to use dynamic loss scaling, or an instance of a LossScale. The string "dynamic" equivalent to passing `DynamicLossScale()`, and passing an int/float is equivalent to passing a FixedLossScale with the given loss scale. """ if not isinstance(optimizer, optimizer_v2.OptimizerV2): raise ValueError('"optimizer" must be an instance of OptimizerV2, but ' 'got: %s' % optimizer) self._raise_if_strategy_unsupported() self._optimizer = optimizer self._loss_scale = keras_loss_scale_module.get(loss_scale) if self._loss_scale is None: raise ValueError('loss_scale cannot be None.') # We don't call super().__init__, since we do not want to call OptimizerV2's # constructor. _DelegatingTrackableMixin.__init__(self, self._optimizer) for weight in self._loss_scale._weights.values(): # pylint: disable=protected-access # We cannot call `track_variable` in the LossScale class itself, because a # file outside of Keras cannot depend on a Keras file. Calling it here # instead is OK, because a variable only needs to be tracked if used with # a Keras class, and the only way to use LossScale with a Keras class is # through the LossScaleOptimizer. backend.track_variable(weight) self._track_trackable(self._loss_scale, 'loss_scale') # To support restoring TensorFlow 2.2 checkpoints. self._track_trackable(FakeOptimizerForRestoration(self._optimizer), 'base_optimizer')
def add_slot(var, slot_name, initializer="zeros"): """Add a new slot variable for `var`.""" if slot_name not in self._slot_names: self._slot_names.append(slot_name) var_key = optimizer_v2._var_key(var) slot_dict = self._slots.setdefault(var_key, {}) weight = slot_dict.get(slot_name, None) if weight is None: if isinstance(initializer, six.string_types) or callable(initializer): initializer = initializers.get(initializer) initial_value = functools.partial(initializer, shape=var.shape, dtype=var.dtype) else: initial_value = initializer strategy = distribute_ctx.get_strategy() with strategy.extended.colocate_vars_with(var): if isinstance(var, de.TrainableWrapper): weight = de.create_slots(var, initial_value, slot_name, var._shared_name) else: weight = variables.Variable( name="%s/%s" % ( var._shared_name, slot_name, ), # pylint: disable=protected-access dtype=var.dtype, trainable=False, initial_value=initial_value, ) backend.track_variable(weight) slot_dict[slot_name] = weight self._restore_slot_variable(slot_name=slot_name, variable=var, slot_variable=weight) self._weights.append(weight) return weight
def __init__(self, opt, loss_scale): """Initializes this loss scale optimizer. Args: opt: The Optimizer instance to wrap. loss_scale: The loss scale to scale the loss and gradients. This can either be an int/float to use a fixed loss scale, the string "dynamic" to use dynamic loss scaling, or an instance of a LossScale. The string "dynamic" equivalent to passing `DynamicLossScale()`, and passing an int/float is equivalent to passing a FixedLossScale with the given loss scale. """ if not isinstance(opt, optimizer_v2.OptimizerV2): raise ValueError( '"opt" must be an instance of OptimizerV2, but got: %s' % opt) if hasattr(opt, 'clipnorm'): raise ValueError( 'LossScaleOptimizer does not support wrapping ' 'optimizers with a clipnorm. Optimizer %s has clipnorm ' '%s' % (opt, opt.clipnorm)) if hasattr(opt, 'clipvalue'): raise ValueError('LossScaleOptimizer does not support wrapping ' 'optimizers with a clipvalue. Optimizer %s has ' 'clipvalue %s' % (opt, opt.clipvalue)) self._optimizer = opt self._loss_scale = loss_scale_module.get(loss_scale) for weight in loss_scale_module.get_loss_scale_weights( self._loss_scale): # We cannot call `track_variable` in the LossScale class itself, because a # file outside of Keras cannot depend on a Keras file. Calling it here # instead is OK, because a variable only needs to be tracked if used with # a Keras class, and the only way to use LossScale with a Keras class is # through the LossScaleOptimizer. backend.track_variable(weight) self._track_trackable(self._optimizer, 'base_optimizer') self._track_trackable(self._loss_scale, 'loss_scale')
def _clone_and_build_model(mode, keras_model, custom_objects, features=None, labels=None): """Clone and build the given keras_model. Args: mode: training mode. keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. features: Dict of tensors. labels: Dict of tensors, or single tensor instance. Returns: The newly built model. """ # Set to True during training, False for inference or testing. K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN) input_tensors, target_tensors = _convert_estimator_io_to_keras( keras_model, features, labels) compile_clone = (mode != model_fn_lib.ModeKeys.PREDICT) global_step = None if compile_clone: # Set iterations to the global step created by tf.train.create_global_step() # which is automatically run in the estimator framework. global_step = training_util.get_or_create_global_step() K.track_variable(global_step) clone = models.clone_and_build_model( keras_model, input_tensors, target_tensors, custom_objects, compile_clone=compile_clone, in_place_reset=(not keras_model._is_graph_network), optimizer_iterations=global_step) return clone
def _variable_creator(self, next_creator, **kwargs): name = kwargs['name'] # Variable named "name" already created in this invocation of `call`. if name in self._variables_added_in_call: raise RuntimeError( '`Variable`s in a `Lambda` layer must have unique ' 'names, found duplicate name: {}'.format(name)) self._variables_added_in_call.add(name) # Reuse Variables across invocations of `call`. if name in self._variable_dict: return self._variable_dict[name] # Variable was never created before. var = next_creator(**kwargs) self._variable_dict[name] = var if var.trainable: self._trainable_weights.append(var) else: self._non_trainable_weights.append(var) K.track_variable(var) return var
def add_slot(self, var, slot_name, initializer="zeros"): """Add a new slot variable for `var`.""" if slot_name not in self._slot_names: self._slot_names.append(slot_name) var_key = _var_key(var) slot_dict = self._slots.setdefault(var_key, {}) weight = slot_dict.get(slot_name, None) if weight is None: if isinstance(initializer, six.string_types) or callable(initializer): initializer = initializers.get(initializer) initial_value = functools.partial( initializer, shape=var.shape, dtype=var.dtype) else: initial_value = initializer strategy = distribute_ctx.get_strategy() if not strategy.extended.variable_created_in_scope(var): raise ValueError( "Trying to create optimizer slot variable under the scope for " "tf.distribute.Strategy ({}), which is different from the scope " "used for the original variable ({}). Make sure the slot " "variables are created under the same strategy scope. This may " "happen if you're restoring from a checkpoint outside the scope" .format(strategy, var)) with strategy.extended.colocate_vars_with(var): weight = tf_variables.Variable( name="%s/%s" % (var._shared_name, slot_name), # pylint: disable=protected-access dtype=var.dtype, trainable=False, initial_value=initial_value) backend.track_variable(weight) slot_dict[slot_name] = weight self._restore_slot_variable( slot_name=slot_name, variable=var, slot_variable=weight) self._weights.append(weight) return weight
def _handle_fp16_and_distributed_optimizer(optimizer, lr_schedule, hvd_backend=None): if hvd_backend == "horovod": import horovod.tensorflow.keras as hvd from horovod.tensorflow import Compression elif hvd_backend == "byteps": import byteps.tensorflow.keras as hvd from byteps.tensorflow import Compression if hvd_backend: compression = Compression.none if compat.CUSTOM_GLOBAL_FLOATX == "float16": compression = Compression.fp16 if lr_schedule is not None and hvd_backend is None: # TODO(ZhaoChengqi): pay attention to API changes optimizer._set_hyper("learning_rate", lr_schedule) # specify the following scenario # there is a bug under TF2.3+Horovod+fp16+XLA if compat.CUSTOM_GLOBAL_FLOATX == "float16": logging.info("NOTICE: using revised DynamicLossScale under fp16") revised_loss_scale = training_utils.RevisedDynamicLossScale() if hvd_backend: opt = LossScaleOptimizer(optimizer, loss_scale=1) opt = hvd.DistributedOptimizer(opt, compression=compression, sparse_as_dense=True) opt._loss_scale = revised_loss_scale for weight in loss_scale_module.get_loss_scale_weights( opt._loss_scale): backend.track_variable(weight) opt._track_trackable(opt._loss_scale, 'loss_scale', overwrite=True) else: opt = LossScaleOptimizer(optimizer, loss_scale=revised_loss_scale) return opt return optimizer
def clone_and_build_model( model, input_tensors=None, target_tensors=None, custom_objects=None, compile_clone=True, in_place_reset=False): """Clone a `Model` and build/compile it with the same settings used before. This function should be run in the same graph as the model. Args: model: `tf.keras.Model` object. Can be Functional, Sequential, or sub-classed. input_tensors: Optional list of input tensors to build the model upon. If not provided, placeholders will be created. target_tensors: Optional list of target tensors for compiling the model. If not provided, placeholders will be created. custom_objects: Optional dictionary mapping string names to custom classes or functions. compile_clone: Boolean, whether to compile model clone (default `True`). in_place_reset: Boolean, whether to reset the model in place. Only used if the model is not a graph network. If the model is a subclassed model, then this argument must be set to `True` (default `False`). To restore the original model, use the function `in_place_subclassed_model_state_restoration(model)`. Returns: Clone of the model. Raises: ValueError: if trying to clone a subclassed model, and `in_place_reset` is set to False. """ if model._is_graph_network: if custom_objects: with CustomObjectScope(custom_objects): clone = clone_model(model, input_tensors=input_tensors) else: clone = clone_model(model, input_tensors=input_tensors) else: if not in_place_reset: raise ValueError( 'Model is not a graph network (usually means that it is a subclassed ' 'model). The model cannot be cloned, but there is a workaround where ' 'the model is reset in-place. To use this, please set the argument ' '`in_place_reset` to `True`. This will reset the attributes in the ' 'original model. To restore the attributes, call ' '`in_place_subclassed_model_state_restoration(model)`.') clone = model _in_place_subclassed_model_reset(clone) if input_tensors is not None: clone._set_inputs(input_tensors) # Compile/Build model if not compile_clone: if isinstance(clone, Sequential): clone.build() elif model.optimizer: if isinstance(model.optimizer, optimizers.TFOptimizer): optimizer = model.optimizer K.track_tf_optimizer(optimizer) else: optimizer_config = model.optimizer.get_config() optimizer = model.optimizer.__class__.from_config(optimizer_config) global_step = training_util.get_or_create_global_step() K.track_variable(global_step) optimizer.iterations = global_step clone.compile( optimizer, model.loss, metrics=model.metrics, loss_weights=model.loss_weights, sample_weight_mode=model.sample_weight_mode, weighted_metrics=model.weighted_metrics, target_tensors=target_tensors) return clone