def _assert_in_default_state(t): t.assertIs(distribute._default_tower_context, distribute.get_tower_context()) t.assertIs(None, distribute.get_cross_tower_context()) t.assertIs(distribute._default_distribution_strategy, distribute.get_distribution_strategy()) t.assertFalse(distribute.has_distribution_strategy())
def merge_fn(dist, s): self.assertIs(distribute._default_distribution_strategy, dist) self.assertIs(None, distribute.get_tower_context()) self.assertIs(dist, distribute.get_cross_tower_context()) self.assertIs(dist, distribute.get_distribution_strategy()) self.assertFalse(distribute.has_distribution_strategy()) return "foo_" + s
def get_updates(self, loss, params): if distribute_lib.has_distribution_strategy(): self.updates = [] if not params: # After the model vars have been created, the second call to get_updates # is called with params as an empty list. This ensures that we call # compute_gradients with params=None. grads = self.optimizer.compute_gradients(loss) else: grads = self.optimizer.compute_gradients(loss, params) global_step = training_util.get_global_step() opt_update = self.optimizer.apply_gradients(grads, global_step) else: if not params: self.updates = [state_ops.assign_add(self.iterations, 1)] return self.updates # Updates list starts out empty because the iterations variable is # incremented in optimizer.apply_gradients() self.updates = [] grads = self.optimizer.compute_gradients(loss, params) opt_update = self.optimizer.apply_gradients( grads, global_step=self.iterations) self.updates.append(opt_update) return self.updates
def run_fn(): tower_context = distribute.get_tower_context() self.assertTrue(tower_context is not None) self.assertIs(None, distribute.get_cross_tower_context()) self.assertTrue(distribute.has_distribution_strategy()) self.assertIs(dist, distribute.get_distribution_strategy()) self.assertEqual("foo", tower_context.merge_call(None, test_arg="foo")) self.assertEqual("bar", variable_scope.variable(1.0, name="bar"))
def testScope(self): _assert_in_default_state(self) dist = _TestStrategy() with dist.scope(): self.assertIs(None, distribute.get_tower_context()) self.assertIs(dist, distribute.get_cross_tower_context()) self.assertTrue(distribute.has_distribution_strategy()) self.assertIs(dist, distribute.get_distribution_strategy()) self.assertEqual("baz", variable_scope.variable(1.0, name="baz")) _assert_in_default_state(self)
def run_fn(): tower_context = distribute.get_tower_context() self.assertTrue(tower_context is not None) self.assertIs(None, distribute.get_cross_tower_context()) self.assertTrue(distribute.has_distribution_strategy()) self.assertIs(dist, distribute.get_distribution_strategy()) self.assertEqual("foo", tower_context.merge_call(None, test_arg="foo")) expected_value = _get_test_variable( "bar", variable_scope.VariableSynchronization.AUTO, variable_scope.VariableAggregation.NONE) self.assertDictEqual(expected_value, variable_scope.variable(1.0, name="bar"))
def testScope(self): _assert_in_default_state(self) dist = _TestStrategy() with dist.scope(): self.assertIs(None, distribute.get_tower_context()) self.assertIs(dist, distribute.get_cross_tower_context()) self.assertTrue(distribute.has_distribution_strategy()) self.assertIs(dist, distribute.get_distribution_strategy()) expected_value = _get_test_variable( "baz", variable_scope.VariableSynchronization.AUTO, variable_scope.VariableAggregation.NONE) self.assertDictEqual(expected_value, variable_scope.variable(1.0, name="baz")) _assert_in_default_state(self)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. RuntimeError: If you should use `_distributed_apply()` instead. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). # Handle DistributionStrategy case. if distribute_lib.get_cross_tower_context(): raise RuntimeError("Use `_distributed_apply()` instead of " "`apply_gradients()` in a cross-tower context.") # TODO(isaprykin): Get rid of `has_distribution_strategy()` check by # always calling _distributed_apply(), using the default distribution # as needed. if distribute_lib.has_distribution_strategy(): grads_and_vars = get_filtered_grad_fn(lambda _: grads_and_vars)() return distribute_lib.get_tower_context().merge_call( self._distributed_apply, grads_and_vars, global_step, name) # No DistributionStrategy case. grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError( "Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) p = _get_processor(v) converted_grads_and_vars.append((g, v, p)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v, _ in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, _, v in converted_grads_and_vars],)) with ops.init_scope(): self._create_slots(var_list) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var, processor in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. # TODO(apassos): figure out how to get the variable name here. if context.executing_eagerly() or isinstance( var, resource_variable_ops.ResourceVariable) and not var._in_graph_mode: # pylint: disable=protected-access scope_name = "" else: scope_name = var.op.name with ops.name_scope("update_" + scope_name), ops.colocate_with(var): update_ops.append(processor.update_op(self, grad)) if global_step is None: apply_updates = self._finish(update_ops, name) else: with ops.control_dependencies([self._finish(update_ops, "update")]): with ops.colocate_with(global_step): if isinstance(global_step, resource_variable_ops.ResourceVariable): # TODO(apassos): the implicit read in assign_add is slow; consider # making it less so. apply_updates = resource_variable_ops.assign_add_variable_op( global_step.handle, ops.convert_to_tensor(1, dtype=global_step.dtype), name=name) else: apply_updates = state_ops.assign_add(global_step, 1, name=name) if not context.executing_eagerly(): if isinstance(apply_updates, ops.Tensor): apply_updates = apply_updates.op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) if apply_updates not in train_op: train_op.append(apply_updates) return apply_updates
def model_fn(features, labels, mode): """model_fn for keras Estimator.""" model = _clone_and_build_model(mode, keras_model, custom_objects, features, labels) model_output_names = [] # We need to make sure that the output names of the last layer in the model # is the same for each of the cloned models. This is required for mirrored # strategy when we call regroup. if distribute_lib.has_distribution_strategy(): for name in model.output_names: name = re.compile(r'_\d$').sub('', name) model_output_names.append(name) else: model_output_names = model.output_names # Get inputs to EstimatorSpec predictions = dict(zip(model_output_names, model.outputs)) loss = None train_op = None eval_metric_ops = None # Set loss and metric only during train and evaluate. if mode is not model_fn_lib.ModeKeys.PREDICT: if mode is model_fn_lib.ModeKeys.TRAIN: model._make_train_function() # pylint: disable=protected-access else: model._make_test_function() # pylint: disable=protected-access loss = model.total_loss if model.metrics: # TODO(fchollet): support stateful metrics eval_metric_ops = {} # When each metric maps to an output if isinstance(model.metrics, dict): for i, output_name in enumerate(model.metrics.keys()): metric_name = model.metrics[output_name] if callable(metric_name): metric_name = metric_name.__name__ # When some outputs use the same metric if list(model.metrics.values()).count(metric_name) > 1: metric_name += '_' + output_name eval_metric_ops[metric_name] = metrics_module.mean( model.metrics_tensors[i - len(model.metrics)]) else: for i, metric_name in enumerate(model.metrics): if callable(metric_name): metric_name = metric_name.__name__ eval_metric_ops[metric_name] = metrics_module.mean( model.metrics_tensors[i]) # Set train_op only during train. if mode is model_fn_lib.ModeKeys.TRAIN: train_op = model.train_function.updates_op if not model._is_graph_network: # Reset model state to original state, # to avoid `model_fn` being destructive for the initial model argument. _in_place_subclassed_model_state_restoration(keras_model) return model_fn_lib.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs={ _DEFAULT_SERVING_KEY: export_lib.export_output.PredictOutput(predictions) })