def _train_model(self, input_fn, hooks, saving_listeners): worker_hooks = [] with ops.Graph().as_default() as g, g.device(self._device_fn): random_seed.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) global_step_read_tensor = training_util._get_or_create_global_step_read( ) # pylint: disable=protected-access with ops.control_dependencies([global_step_read_tensor]): features, labels = self._get_features_and_labels_from_input_fn( input_fn, model_fn_lib.ModeKeys.TRAIN) estimator_spec = self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN, self.config) # Check if the user created a loss summary, and add one if they didn't. # We assume here that the summary is called 'loss'. If it is not, we will # make another one with the name 'loss' to ensure it shows up in the right # graph in TensorBoard. if not any([ x.op.name == 'loss' for x in ops.get_collection(ops.GraphKeys.SUMMARIES) ]): summary.scalar('loss', estimator_spec.loss) ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss) worker_hooks.extend(hooks) worker_hooks.extend([ training.NanTensorHook(estimator_spec.loss), training.LoggingTensorHook( { 'loss': estimator_spec.loss, 'step': global_step_tensor }, every_n_iter=100) ]) worker_hooks.extend(estimator_spec.training_hooks) if not (estimator_spec.scaffold.saver or ops.get_collection(ops.GraphKeys.SAVERS)): ops.add_to_collection( ops.GraphKeys.SAVERS, training.Saver( sharded=True, max_to_keep=self._config.keep_checkpoint_max, keep_checkpoint_every_n_hours=( self._config.keep_checkpoint_every_n_hours), defer_build=True, save_relative_paths=True)) chief_hooks = [] all_hooks = worker_hooks + list( estimator_spec.training_chief_hooks) saver_hooks = [ h for h in all_hooks if isinstance(h, training.CheckpointSaverHook) ] if (self._config.save_checkpoints_secs or self._config.save_checkpoints_steps): if not saver_hooks: chief_hooks = [ training.CheckpointSaverHook( self._model_dir, save_secs=self._config.save_checkpoints_secs, save_steps=self._config.save_checkpoints_steps, scaffold=estimator_spec.scaffold) ] saver_hooks = [chief_hooks[0]] if saving_listeners: if not saver_hooks: raise ValueError( 'There should be a CheckpointSaverHook to use saving_listeners. ' 'Please set one of the RunConfig.save_checkpoints_steps or ' 'RunConfig.save_checkpoints_secs.') else: # It is expected to have one CheckpointSaverHook. If multiple, we pick # up the first one to add listener. saver_hooks[0]._listeners.extend(saving_listeners) # pylint: disable=protected-access with training.MonitoredTrainingSession( master=self._config.master, is_chief=self._config.is_chief, checkpoint_dir=self._model_dir, scaffold=estimator_spec.scaffold, hooks=worker_hooks, chief_only_hooks=( tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)), save_checkpoint_secs=0, # Saving is handled by a hook. save_summaries_steps=self._config.save_summary_steps, config=self._session_config, log_step_count_steps=self._config.log_step_count_steps ) as mon_sess: loss = None while not mon_sess.should_stop(): _, loss = mon_sess.run( [estimator_spec.train_op, estimator_spec.loss]) return loss
def _test_minimize_loss_graph(self, task_type, task_id, num_gpus): d, master_target, config = self._get_test_object( task_type, task_id, num_gpus) with ops.Graph().as_default(), \ self.cached_session(config=config, target=master_target) as sess, \ d.scope(): l = core.Dense(1, use_bias=False, name='gpu_%d' % d.extended._num_gpus_per_worker) def loss_fn(x): y = array_ops.reshape(l(x), []) - constant_op.constant(1.) return y * y # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for # multiple graphs (b/111216820). def grad_fn(x): loss = loss_fn(x) var_list = (variables.trainable_variables() + ops.get_collection( ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) grads = gradients.gradients(loss, var_list) ret = list(zip(grads, var_list)) return ret def update(v, g): return v.assign_sub(0.05 * g, use_locking=True) one = constant_op.constant([[1.]]) def step(): """Perform one optimization step.""" # Run forward & backward to get gradients, variables list. g_v = d.extended.call_for_each_replica(grad_fn, args=[one]) # Update the variables using the gradients and the update() function. before_list = [] after_list = [] for g, v in g_v: fetched = d.extended.read_var(v) before_list.append(fetched) with ops.control_dependencies([fetched]): # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.extended.reduce_to(reduce_util.ReduceOp.SUM, g, destinations=v) with ops.control_dependencies( d.extended.update(v, update, args=(g, ), group=False)): after_list.append(d.extended.read_var(v)) return before_list, after_list before_out, after_out = step() if context.num_gpus() < d.extended._num_gpus_per_worker: return True sess.run(variables.global_variables_initializer()) for i in range(10): b, a = sess.run((before_out, after_out)) if i == 0: before, = b after, = a error_before = abs(before - 1) error_after = abs(after - 1) # Error should go down self.assertLess(error_after, error_before)
def testInvalidShape(self): init = init_ops.identity_initializer() with self.session(graph=ops.Graph(), use_gpu=True): self.assertRaises(ValueError, init, shape=[5, 7, 7]) self.assertRaises(ValueError, init, shape=[5]) self.assertRaises(ValueError, init, shape=[])
def _testScopedImport(self, test_dir, exported_filenames): graph = ops.Graph() # Create all the missing inputs. with graph.as_default(): new_image = constant_op.constant(1.2, dtypes.float32, shape=[100, 28], name="images") with self.assertRaisesRegexp(ValueError, "Graph contains unbound inputs"): meta_graph.import_scoped_meta_graph(os.path.join( test_dir, exported_filenames[0]), graph=graph, import_scope="new_hidden1") with self.assertRaisesRegexp(ValueError, "Graph contains unbound inputs"): meta_graph.import_scoped_meta_graph( os.path.join(test_dir, exported_filenames[0]), graph=graph, input_map={"image:0": new_image}, import_scope="new_hidden1") # Verifies we can import the original "hidden1" into "new_hidden1". var_list = meta_graph.import_scoped_meta_graph( os.path.join(test_dir, exported_filenames[0]), graph=graph, input_map={"$unbound_inputs_images": new_image}, import_scope="new_hidden1") self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) new_var_names = [v.name for _, v in var_list.items()] self.assertEqual(["new_hidden1/biases:0", "new_hidden1/weights:0"], sorted(new_var_names)) # Verifies we can import the original "hidden2" into "new_hidden2". hidden1 = array_ops.identity( graph.as_graph_element("new_hidden1/Relu:0"), name="hidden1/Relu") var_list = meta_graph.import_scoped_meta_graph( os.path.join(test_dir, exported_filenames[1]), graph=graph, input_map={"$unbound_inputs_hidden1/Relu": hidden1}, import_scope="new_hidden2", unbound_inputs_col_name=None) self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) new_var_names = [v.name for _, v in var_list.items()] self.assertEqual(["new_hidden2/biases:0", "new_hidden2/weights:0"], sorted(new_var_names)) # Verifies we can import the original "softmax_linear" into # "new_softmax_linear". hidden2 = array_ops.identity( graph.as_graph_element("new_hidden2/Relu:0"), name="hidden2/Relu") var_list = meta_graph.import_scoped_meta_graph( os.path.join(test_dir, exported_filenames[2]), graph=graph, input_map={"$unbound_inputs_hidden2/Relu": hidden2}, import_scope="new_softmax_linear", unbound_inputs_col_name=None) self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) new_var_names = [v.name for _, v in var_list.items()] self.assertEqual( ["new_softmax_linear/biases:0", "new_softmax_linear/weights:0"], sorted(new_var_names)) # Exports the scoped meta graphs again. new_meta_graph1, var_list = meta_graph.export_scoped_meta_graph( graph=graph, export_scope="new_hidden1") self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) new_meta_graph2, var_list = meta_graph.export_scoped_meta_graph( graph=graph, export_scope="new_hidden2", unbound_inputs_col_name=None) self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) new_meta_graph3, var_list = meta_graph.export_scoped_meta_graph( graph=graph, export_scope="new_softmax_linear", unbound_inputs_col_name=None) self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) return [new_meta_graph1, new_meta_graph2, new_meta_graph3]
def testShapeInferenceUnknownShape(self): with ops.Graph().as_default(): indices = array_ops.placeholder(dtypes.int64) shape = array_ops.placeholder(dtypes.int64) output = sparse_ops.sparse_to_dense(indices, shape, 1, 0) self.assertIsNone(output.get_shape().ndims)
def _test_device_assignment_local(self, d, compute_device='CPU', variable_device='CPU', num_gpus=0): with ops.Graph().as_default(), \ self.cached_session(target=self._default_target, config=self._sess_config) as sess, \ d.scope(): def model_fn(): if 'CPU' in compute_device: replica_compute_device = '/device:CPU:0' else: replica_id = _get_replica_id_integer() replica_compute_device = ('/device:GPU:%d' % replica_id) replica_compute_device = device_util.canonicalize( replica_compute_device) if 'CPU' in variable_device: replica_variable_device = '/device:CPU:0' else: replica_id = _get_replica_id_integer() replica_variable_device = ('/device:GPU:%d' % replica_id) replica_variable_device = device_util.canonicalize( replica_variable_device) a = constant_op.constant(1.0) b = constant_op.constant(2.0) c = a + b self.assertEqual(a.device, replica_compute_device) self.assertEqual(b.device, replica_compute_device) self.assertEqual(c.device, replica_compute_device) # The device scope is ignored for variables but not for normal ops. with ops.device('/device:GPU:2'): x = variable_scope.get_variable( 'x', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) x_add = x.assign_add(c) e = a + c self.assertEqual(device_util.canonicalize(x.device), replica_variable_device) self.assertEqual(x_add.device, x.device) self.assertEqual(e.device, device_util.canonicalize('/device:GPU:2')) # The colocate_vars_with can override the distribution's device. with d.colocate_vars_with(x): y = variable_scope.get_variable( 'y', initializer=20.0, aggregation=variable_scope.VariableAggregation.SUM) # We add an identity here to avoid complaints about summing # non-distributed values. y_add = y.assign_add(array_ops.identity(x_add)) self.assertEqual(device_util.canonicalize(y.device), replica_variable_device) self.assertEqual(y_add.device, y.device) self.assertEqual(y.device, x.device) z = variable_scope.get_variable( 'z', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) self.assertEqual(device_util.canonicalize(z.device), replica_variable_device) with ops.control_dependencies([y_add]): # We add an identity here to avoid complaints about summing # non-distributed values. z_add = z.assign_add(array_ops.identity(y)) with ops.control_dependencies([z_add]): f = z + c self.assertEqual(f.device, replica_compute_device) # The device scope would merge with the default worker device. with ops.device('/CPU:1'): g = e + 1.0 self.assertEqual(g.device, device_util.canonicalize('/device:CPU:1')) # Ths ops.colocate_with will be ignored when defining a variale but not # for a normal tensor. with ops.colocate_with(x): u = variable_scope.get_variable('u', initializer=30.0) h = f + 1.0 self.assertEqual(device_util.canonicalize(u.device), replica_variable_device) self.assertEqual(device_util.canonicalize(x.device), device_util.canonicalize(h.device)) return y_add, z_add, f y, z, f = d.call_for_each_replica(model_fn) self.assertNotEqual(y, None) self.assertNotEqual(z, None) self.assertNotEqual(f, None) if context.num_gpus() >= 1 and num_gpus <= 1: variables.global_variables_initializer().run() y_val, z_val, f_val = sess.run([y, z, f]) self.assertEqual(y_val, 33.0) self.assertEqual(z_val, 43.0) self.assertEqual(f_val, 46.0)
def _test_minimize_loss_graph(self, task_type, task_id, num_gpus, use_core_strategy=False): d, master_target, sess_config = self._get_test_objects( task_type, task_id, num_gpus, use_core_strategy=use_core_strategy) if task_type: # Multi-worker assert hasattr(d.extended, '_cluster_spec') and d.extended._cluster_spec num_workers = len(d.extended._cluster_spec.as_dict().get(WORKER)) if CHIEF in d.extended._cluster_spec.as_dict(): num_workers += 1 else: # local num_workers = 1 with ops.Graph().as_default(), \ self.cached_session(target=master_target, config=sess_config) as sess, \ d.scope(): l = core.Dense(1, use_bias=False) def loss_fn(x): y = array_ops.reshape(l(x), []) - constant_op.constant(1.) return y * y # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for # multiple graphs (b/111216820). def grad_fn(x): loss = loss_fn(x) var_list = (variables.trainable_variables() + ops.get_collection( ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) grads = gradients.gradients(loss, var_list) ret = list(zip(grads, var_list)) return ret def update(v, g): return v.assign_sub(0.05 * g, use_locking=True) one = d.broadcast(constant_op.constant([[1.]])) def step(): """Perform one optimization step.""" # Run forward & backward to get gradients, variables list. g_v = d.call_for_each_replica(grad_fn, args=(one, )) # Update the variables using the gradients and the update() function. before_list = [] after_list = [] for g, v in g_v: fetched = d.extended.read_var(v) before_list.append(fetched) with ops.control_dependencies([fetched]): # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.extended.reduce_to(reduce_util.ReduceOp.SUM, g, destinations=v) with ops.control_dependencies( d.update(v, update, g, grouped=False)): after_list.append(d.extended.read_var(v)) return before_list, after_list before_out, after_out = step() if context.num_gpus() < d.extended._num_gpus_per_worker: return True if (not task_type or multi_worker_util.is_chief( d.extended._cluster_spec, task_type, task_id)): variables.global_variables_initializer().run() # Workers waiting for chief worker's initializing variables. self._init_condition.acquire() self._init_reached += 1 while self._init_reached != num_workers: self._init_condition.wait() self._init_condition.notify_all() self._init_condition.release() for i in range(10): b, a = sess.run((before_out, after_out)) if i == 0: before, = b after, = a error_before = abs(before - 1) error_after = abs(after - 1) # Error should go down self.assertLess(error_after, error_before) return error_after < error_before
def testPartitionedVariableAssignments(self): with ops.Graph().as_default(), self.cached_session(): v0 = variables.Variable(initial_value=[0.0]) v1 = variables.Variable(initial_value=[1.0]) v2 = variables.Variable(initial_value=[20.0]) v3 = variables.Variable(initial_value=[30.0]) v0._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) v1._set_save_slice_info( variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1])) v2._set_save_slice_info( variables.Variable.SaveSliceInfo(v2.name, [2], [0], [1])) v3._set_save_slice_info( variables.Variable.SaveSliceInfo(v3.name, [2], [1], [1])) partitions = [2] # Pass variable_list as [v1, v0] to ensure they are properly # re-sorted to [v0, v1] based on their slice info offsets. pv_0 = variables.PartitionedVariable(name="two_vars", shape=[2], dtype=v0.dtype, variable_list=[v0, v1], partitions=partitions) pv_1 = variables.PartitionedVariable(name="two_vars", shape=[2], dtype=v0.dtype, variable_list=[v2, v3], partitions=partitions) deltas_a = constant_op.constant([1.0, 2.0]) deltas_b = constant_op.constant([3.0, 4.0]) ones = array_ops.ones([2]) plus_delta = pv_0.assign_add(deltas_a) minus_delta = pv_0.assign_sub(deltas_b) assign_ones = pv_0.assign(ones) c_0 = constant_op.constant([2.0]) c_1 = constant_op.constant([3.0]) assign_list = pv_1.assign([c_0, c_1]) assign_part_value = pv_1.assign_add(assign_ones) assign_part_var = pv_1.assign_sub(pv_0) variables.global_variables_initializer().run() self.assertEqual([1.0], plus_delta[0].eval()) self.assertEqual([1.0], self.evaluate(v0)) self.assertEqual([3.0], plus_delta[1].eval()) self.assertEqual([3.0], self.evaluate(v1)) self.assertEqual([-2.0], minus_delta[0].eval()) self.assertEqual([-2.0], self.evaluate(v0)) self.assertEqual([-1.0], minus_delta[1].eval()) self.assertEqual([-1.0], self.evaluate(v1)) self.assertEqual([1.0], assign_ones[0].eval()) self.assertEqual([1.0], self.evaluate(v0)) self.assertEqual([1.0], assign_ones[1].eval()) self.assertEqual([1.0], self.evaluate(v1)) self.assertEqual([2.0], assign_list[0].eval()) self.assertEqual([2.0], self.evaluate(v2)) self.assertEqual([3.0], assign_list[1].eval()) self.assertEqual([3.0], self.evaluate(v3)) self.assertEqual([3.0], assign_part_value[0].eval()) self.assertEqual([3.0], self.evaluate(v2)) self.assertEqual([4.0], assign_part_value[1].eval()) self.assertEqual([4.0], self.evaluate(v3)) self.assertEqual([2.0], assign_part_var[0].eval()) self.assertEqual([2.0], self.evaluate(v2)) self.assertEqual([3.0], assign_part_var[1].eval()) self.assertEqual([3.0], self.evaluate(v3))
def _export_mode(mode, has_saved_vars, builder, model, custom_objects, checkpoint_path, input_signature): """Exports a model, and optionally saves new vars from the clone model. Args: mode: A `tf.estimator.ModeKeys` string. has_saved_vars: A `boolean` indicating whether the SavedModel has already exported variables. builder: A `SavedModelBuilder` object. model: A `tf.keras.Model` object. custom_objects: A dictionary mapping string names to custom classes or functions. checkpoint_path: String path to checkpoint. input_signature: Nested TensorSpec containing the expected inputs. Can be `None`, in which case the signature will be inferred from the model. Raises: ValueError: If the train/eval mode is being exported, but the model does not have an optimizer. """ from tensorflow.python.keras import models as models_lib # pylint: disable=g-import-not-at-top compile_clone = (mode != mode_keys.ModeKeys.PREDICT) if compile_clone and not model.optimizer: raise ValueError( 'Model does not have an optimizer. Cannot export mode %s' % mode) model_graph = ops.get_default_graph() with ops.Graph().as_default() as g, K.learning_phase_scope( mode == mode_keys.ModeKeys.TRAIN): if input_signature is None: input_tensors = None else: input_tensors = nest.map_structure(create_placeholder, input_signature) # Clone the model into blank graph. This will create placeholders for inputs # and targets. clone = models_lib.clone_and_build_model(model, input_tensors=input_tensors, custom_objects=custom_objects, compile_clone=compile_clone) # Make sure that iterations variable is added to the global step collection, # to ensure that, when the SavedModel graph is loaded, the iterations # variable is returned by `tf.compat.v1.train.get_global_step()`. This is # required for compatibility with the SavedModelEstimator. if compile_clone: g.add_to_collection(ops.GraphKeys.GLOBAL_STEP, clone.optimizer.iterations) # Extract update and train ops from train/test/predict functions. train_op = None if mode == mode_keys.ModeKeys.TRAIN: clone._make_train_function() train_op = clone.train_function.updates_op elif mode == mode_keys.ModeKeys.TEST: clone._make_test_function() else: clone._make_predict_function() g.get_collection_ref(ops.GraphKeys.UPDATE_OPS).extend( clone.state_updates) with session.Session().as_default(): clone_var_list = _get_var_list(clone) if has_saved_vars: # Confirm all variables in the clone have an entry in the checkpoint. status = clone.load_weights(checkpoint_path) status.assert_existing_objects_matched() else: # Confirm that variables between the clone and model match up exactly, # not counting optimizer objects. Optimizer objects are ignored because # if the model has not trained, the slot variables will not have been # created yet. # TODO(b/113179535): Replace with trackable equivalence. _assert_same_non_optimizer_objects(model, model_graph, clone, g) # TODO(b/113178242): Use value transfer for trackable objects. clone.load_weights(checkpoint_path) # Add graph and variables to SavedModel. # TODO(b/113134168): Switch to add_meta_graph_and_variables. clone.save_weights(checkpoint_path, save_format='tf', overwrite=True) builder._has_saved_variables = True # Add graph to the SavedModel builder. builder.add_meta_graph( model_utils.EXPORT_TAG_MAP[mode], signature_def_map=_create_signature_def_map(clone, mode), saver=saver_lib.Saver( clone_var_list, # Allow saving Models with no variables. This is somewhat odd, but # it's not necessarily a bug. allow_empty=True), init_op=variables.local_variables_initializer(), train_op=train_op) return None
def testZeroSizeVarInitialized(self): with ops.Graph().as_default(), self.cached_session() as sess: v = variables.Variable(array_ops.zeros([0, 2]), name="v") uninited = variables.report_uninitialized_variables() v.initializer.run() # not strictly necessary self.assertEqual(0, self.evaluate(uninited).size)
def testNoVars(self): with ops.Graph().as_default(): self.assertEqual(None, variables.assert_variables_initialized())
def testNoVars(self): with ops.Graph().as_default(), self.cached_session() as sess: uninited = variables.report_uninitialized_variables() self.assertEqual(0, self.evaluate(uninited).size)
def _query_tpu_system_metadata(master_address, cluster_def=None, query_topology=False): """Automatically detects the TPU system metadata in the system.""" tpu_core_count = 0 devices = [] device_dict = collections.defaultdict(list) # TODO(b/120564445): Replace with standard library for retries. retry_count = 1 while True: logging.info('Querying Tensorflow master (%s) for TPU system metadata.', master_address) try: with ops.Graph().as_default(): with session_lib.Session( master_address, config=get_session_config_with_timeout( _PINGING_MASTER_TIMEOUT_IN_MS, cluster_def)) as sess: devices = sess.list_devices() for device in devices: match = _TPU_DEVICE_REG.match(device.name) if match: host_id = match.group(1) core_id = match.group(2) device_dict[host_id].append(core_id) tpu_core_count += 1 break except errors.DeadlineExceededError: msg = ('Failed to connect to the Tensorflow master. The TPU worker may ' 'not be ready (still scheduling) or the Tensorflow master address ' 'is incorrect: got (%s).' % (master_address)) # TODO(xiejw): For local or grpc master we might not need retry logic # here. if retry_count <= _RETRY_TIMES: logging.warning('%s', msg) logging.warning('Retrying (%d/%d).', retry_count, _RETRY_TIMES) retry_count += 1 else: raise ValueError(msg) num_of_cores_per_host = 0 if tpu_core_count: num_cores_per_host_set = set( [len(core_ids) for core_ids in device_dict.values()]) if len(num_cores_per_host_set) != 1: raise RuntimeError( 'TPU cores on each host is not same. This should not happen!. ' 'devices: {}'.format(devices)) num_of_cores_per_host = num_cores_per_host_set.pop() topology = None if query_topology: if not tpu_core_count: raise RuntimeError( 'Cannot find any TPU cores in the system (master address {}). ' 'This usually means the master address is incorrect or the ' 'TPU worker has some problems. Available devices: {}'.format( master_address, devices)) topology = _obtain_topology(master_address, cluster_def) metadata = _TPUSystemMetadata( num_cores=tpu_core_count, num_hosts=len(device_dict), num_of_cores_per_host=num_of_cores_per_host, topology=topology, devices=devices) if tpu_core_count: logging.info('Found TPU system:') logging.info('*** Num TPU Cores: %d', metadata.num_cores) logging.info('*** Num TPU Workers: %d', metadata.num_hosts) logging.info('*** Num TPU Cores Per Worker: %d', metadata.num_of_cores_per_host) for device in metadata.devices: logging.info('*** Available Device: %s', device) else: logging.info('Failed to find TPU: %s', metadata) return metadata
def _evaluate_model(self, input_fn, hooks=None, checkpoint_path=None, name=''): """Evaluates the model using the training.evaluation library.""" # Check that model has been trained (if nothing has been set explicitly). if not checkpoint_path: latest_path = saver.latest_checkpoint(self._model_dir) if not latest_path: raise ValueError( 'Could not find trained model in model_dir: {}.'.format( self._model_dir)) checkpoint_path = latest_path # Setup output directory. eval_dir = os.path.join(self._model_dir, 'eval' if not name else 'eval_' + name) with ops.Graph().as_default() as g: random_seed.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) features, labels = self._get_features_and_labels_from_input_fn( input_fn, model_fn_lib.ModeKeys.EVAL) estimator_spec = self._call_model_fn(features, labels, model_fn_lib.ModeKeys.EVAL, self.config) if model_fn_lib.LOSS_METRIC_KEY in estimator_spec.eval_metric_ops: raise ValueError( 'Metric with name "%s" is not allowed, because Estimator ' % (model_fn_lib.LOSS_METRIC_KEY) + 'already defines a default metric with the same name.') estimator_spec.eval_metric_ops[ model_fn_lib.LOSS_METRIC_KEY] = metrics_lib.mean( estimator_spec.loss) update_op, eval_dict = _extract_metric_update_ops( estimator_spec.eval_metric_ops) if ops.GraphKeys.GLOBAL_STEP in eval_dict: raise ValueError( 'Metric with name `global_step` is not allowed, because Estimator ' 'already defines a default metric with the same name.') eval_dict[ops.GraphKeys.GLOBAL_STEP] = global_step_tensor all_hooks = list(hooks or []) all_hooks.extend(list(estimator_spec.evaluation_hooks or [])) eval_results = evaluation._evaluate_once( # pylint: disable=protected-access checkpoint_path=checkpoint_path, master=self._config.evaluation_master, scaffold=estimator_spec.scaffold, eval_ops=update_op, final_ops=eval_dict, hooks=all_hooks, config=self._session_config) _write_dict_to_summary( output_dir=eval_dir, dictionary=eval_results, current_global_step=eval_results[ops.GraphKeys.GLOBAL_STEP]) return eval_results
def define_function(func, input_types): """Creates a `FunctionDef` for a python function. `func` is a Python function that receives zero or more tensors and returns at least one tensor. It should add ops to the default graph the usual way by calling TensorFlow functions such as `tf.constant()`, `tf.matmul()`, etc. `input_types` is a dictionary of strings to `tf.Dtype` objects. Keys are names arguments to `func`. The value indicate the type of tensor expected by the function. The returned `FunctionDef` protocol buffer is also added to the default graph library. After it has been added you can add calls to the function by passing it to `tf.call_function()`, together with a list of tensors to use as inputs for the function. Notes: * `func` is called once, with `placeholder` tensors of the types specified in `input_types` as arguments. * Values returned by `func` must be tensors and they are recorded as being the output of the function def. * While `func` is a called, an empty graph is temporarily pushed as the default graph. All ops added by `func` to that graph are part of the body of the returned function def. Example, but also see the [How To on functions](link_needed). ```python # A function that receives two tensors x, y and returns their # sum and difference. def my_func(x, y): return x + y, x - y # Create a FunctionDef for 'my_func'. (This does not change the default graph.) my_func_def = tf.define_function(my_func, {'x': tf.float32, 'y': tf.float32}) # Build the graph, calling the function. a = tf.constant([1.0]) b = tf.constant([2.0]) c, d = tf.call_function(my_func_def, a, b, name='mycall') ``` Args: func: a Python function. input_types: dict. Keys are the names of the arguments of `func`, values are their expected `tf.DType`. Returns: A FunctionDef protocol buffer. Raises: ValueError: if the arguments are invalid. """ # TODO(touts): Lift the limitation that func can only receive Tensor args. if inspect.isfunction(func): func_name = func.__name__ elif inspect.ismethod(func): func_name = func.__self__.__name__ + "." + func.__name__ else: raise ValueError("Argument must be a function") argspec = inspect.getargspec(func) if argspec.varargs or argspec.keywords or argspec.defaults: raise ValueError("Only functions with plain arglists are supported.") if inspect.isfunction(func): if len(argspec.args) != len(input_types): raise ValueError( "The function must have the same number of arguments " "as the number of specified input types.") args = argspec.args elif inspect.ismethod(func): if len(argspec.args) != 1 + len(input_types): raise ValueError( "The class function must have the same number of arguments " "as the number of specified input types.") args = argspec.args[1:] # 1st argument is the "class" type. # Create the func_def object. temp_graph = ops.Graph() with temp_graph.as_default(): # List of placeholders for the function_def. inputs = [] # Arglist to call 'func' kwargs = {} for argname in args: if argname not in input_types: raise ValueError("Missing type for argument: " + argname) argholder = array_ops.placeholder(input_types[argname], name=argname) inputs.append(argholder) kwargs[argname] = argholder # Call func and gather the output tensors. outputs = func(**kwargs) if not outputs: raise ValueError("Function must return at least one tensor") # Convenience: if func only returned one value, make it a tuple. if not isinstance(outputs, (list, tuple)): outputs = (outputs, ) # Build the FunctionDef func_def = graph_to_function_def(temp_graph, func_name, inputs, outputs) g = ops.get_default_graph() g._add_function(func_def) # pylint: disable=protected-access return func_def
def _testGradient(self, inputs, weights, expected_jacobians_wrt_input, lattice_sizes, is_hypercube): """Compute the grad_wrt_input and compare it with expected_grad_wrt_input. Args: inputs: a 2D array (or numpy array) contains the test inputs. Its shape should be num_examples x input_size. weights: a 2D array (or numpy array) contains the test weights. Its shape should be num_examples x weight_size. expected_jacobians_wrt_input: 3D array (or numpy) contains a transpoed jacobian matrix that contains dweight/dinput with shape (num_examples, weight_size, input_size). In other words, expected_jacobians_wrt_input[num][ii][jj] == dweight[num][jj]/dinput[num][ii], where num means the current example. lattice_sizes: A list of lattice_sizes. is_hypercube: If true, hypercube gradient is tested, otherwise simplex gradient is tested. Returns: None Raises: Fails if computed jacobian_wrt_inputs != expected_jacobian_wrt_inpu. """ # Number of test examples in inputs. num_examples = len(inputs) weight_size = len(weights[0]) # Define the grad_wrt_input_tensor. with ops.Graph().as_default(): input_tensor = constant_op.constant(inputs, dtype=dtypes.float32) weight_tensor = constant_op.constant(weights, dtype=dtypes.float32) grad_wrt_weight_tensor = array_ops.placeholder( dtype=dtypes.float32, shape=(num_examples, weight_size)) if is_hypercube: grad_wrt_input_tensor = lattice_ops.hypercube_gradient( input_tensor, weight_tensor, grad_wrt_weight_tensor, lattice_sizes) else: grad_wrt_input_tensor = lattice_ops.simplex_gradient( input_tensor, weight_tensor, grad_wrt_weight_tensor, lattice_sizes) # Compute the Jacobian. with self.test_session(use_gpu=False): tf_logging.info("input = %s " % inputs) tf_logging.info("weight = %s " % weights) # num_examples x weight_size x input_size tensor. jacobians_wrt_input = [] # Compute dweight[cnt] / dinput. for cnt in range(weight_size): grad_wrt_weight = [0.] * weight_size grad_wrt_weight[cnt] = 1.0 grad_wrt_weights = [ grad_wrt_weight for _ in range(num_examples) ] tf_logging.info("grad_wrt_weights = %s " % grad_wrt_weights) # num_examples x input_size matrix. grad_weight_wrt_inputs = grad_wrt_input_tensor.eval( feed_dict={grad_wrt_weight_tensor: grad_wrt_weights}) tf_logging.info("grad_wrt_inputs = %s " % grad_weight_wrt_inputs) jacobians_wrt_input.append(grad_weight_wrt_inputs) tf_logging.info("jacobian_wrt_inputs = %s " % jacobians_wrt_input) tf_logging.info("expected_jacobian_wrt_inputs = %s" % expected_jacobians_wrt_input) self.assertAllClose(jacobians_wrt_input, expected_jacobians_wrt_input)
def _test_device_assignment_distributed(self, task_type, task_id, num_gpus, use_core_strategy=False): worker_device = '/job:%s/replica:0/task:%d' % (task_type, task_id) d, _, sess_config = self._get_test_objects( task_type, task_id, num_gpus, use_core_strategy=use_core_strategy) with ops.Graph().as_default(), \ self.cached_session(target=self._default_target, config=sess_config) as sess, \ d.scope(): # Define a variable outside the call_for_each_replica scope. n = variable_scope.get_variable('n', initializer=10.0) self.assertEqual(n.device, '/job:ps/task:0') def model_fn(): if num_gpus == 0: last_part_device = 'device:CPU:0' else: replica_id = _get_replica_id_integer() last_part_device = ('device:GPU:%d' % replica_id) a = constant_op.constant(1.0) b = constant_op.constant(2.0) c = a + b self.assertEqual(a.device, worker_device + '/' + last_part_device) self.assertEqual(b.device, worker_device + '/' + last_part_device) self.assertEqual(c.device, worker_device + '/' + last_part_device) # The device scope is ignored for variables but not for normal ops. with ops.device('/job:worker/task:0'): x = variable_scope.get_variable( 'x', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) x_add = x.assign_add(c) e = a + c # The variable x is on the task 1 since the device_function has been # called once before the model_fn. self.assertEqual(x.device, '/job:ps/task:1') self.assertEqual(x_add.device, x.device) self.assertEqual( e.device, '/job:worker/replica:0/task:0/%s' % last_part_device) # The colocate_vars_with can override the distribution's device. with d.colocate_vars_with(x): y = variable_scope.get_variable( 'y', initializer=20.0, aggregation=variable_scope.VariableAggregation.SUM) # We add an identity here to avoid complaints about summing # non-distributed values. y_add = y.assign_add(array_ops.identity(x_add)) self.assertEqual(y.device, '/job:ps/task:1') self.assertEqual(y_add.device, y.device) self.assertEqual(y.device, x.device) z = variable_scope.get_variable( 'z', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) self.assertEqual(z.device, '/job:ps/task:0') self.assertNotEqual(z.device, x.device) with ops.control_dependencies([y_add]): # We add an identity here to avoid complaints about summing # non-distributed values. z_add = z.assign_add(array_ops.identity(y)) with ops.control_dependencies([z_add]): f = z + c self.assertEqual(f.device, worker_device + '/' + last_part_device) # The device scope would merge with the default worker device. with ops.device('/CPU:1'): g = e + 1.0 self.assertEqual(g.device, worker_device + '/device:CPU:1') # Ths ops.colocate_with will be ignored when defining a variale but not # for a normal tensor. with ops.colocate_with(x): u = variable_scope.get_variable('u', initializer=30.0) v = variable_scope.get_variable('v', initializer=30.0) h = f + 1.0 self.assertIn('/job:ps/', u.device) self.assertIn('/job:ps/', v.device) # u and v are on different parameter servers. self.assertTrue(u.device != x.device or v.device != x.device) self.assertTrue(u.device == x.device or v.device == x.device) # Here h is not on one worker. Note h.device is canonical while x.device # is not but. self.assertIn('/job:ps/', h.device) return y_add, z_add, f y, z, f = d.call_for_each_replica(model_fn) self.assertNotEqual(y, None) self.assertNotEqual(z, None) self.assertNotEqual(f, None) if context.num_gpus() >= 1 and num_gpus <= 1: variables.global_variables_initializer().run() y_val, z_val, f_val = sess.run([y, z, f]) self.assertEqual(y_val, 33.0) self.assertEqual(z_val, 43.0) self.assertEqual(f_val, 46.0)
def testGraphPassedToGraph_isForbiddenForThineOwnSafety(self): with self.assertRaises(TypeError): summary_ops.graph(ops.Graph()) with self.assertRaises(TypeError): summary_ops.graph('')
def _test_simple_increment(self, task_type, task_id, num_gpus, use_core_strategy=False): d, master_target, sess_config = self._get_test_objects( task_type, task_id, num_gpus, use_core_strategy=use_core_strategy) if d.extended._cluster_spec: num_workers = len(d.extended._cluster_spec.as_dict().get(WORKER)) if 'chief' in d.extended._cluster_spec.as_dict(): num_workers += 1 else: num_workers = 1 with ops.Graph().as_default(), \ self.cached_session(target=master_target, config=sess_config) as sess, \ d.scope(): def model_fn(): x = variable_scope.get_variable( 'x', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) y = variable_scope.get_variable( 'y', initializer=20.0, aggregation=variable_scope.VariableAggregation.SUM) z = variable_scope.get_variable( 'z', initializer=30.0, aggregation=variable_scope.VariableAggregation. ONLY_FIRST_REPLICA) # We explicitly make a constant tensor here to avoid complaints about # summing non-distributed values. one = constant_op.constant(1.0) x_add = x.assign_add(one, use_locking=True) y_add = y.assign_add(one, use_locking=True) z_add = z.assign_add(one, use_locking=True) train_op = control_flow_ops.group(x_add, y_add, z_add) return x, y, z, train_op x, y, z, train_op = d.call_for_each_replica(model_fn) train_op = d.group(train_op) if context.num_gpus() < d.extended._num_gpus_per_worker: return True if task_id == 0: variables.global_variables_initializer().run() # Workers waiting for chief worker's initializing variables. self._init_condition.acquire() self._init_reached += 1 while self._init_reached != num_workers: self._init_condition.wait() self._init_condition.notify_all() self._init_condition.release() sess.run(train_op) # Wait for other workers to finish training. self._finish_condition.acquire() self._finish_reached += 1 while self._finish_reached != num_workers: self._finish_condition.wait() self._finish_condition.notify_all() self._finish_condition.release() x_val, y_val, z_val = sess.run([x, y, z]) self.assertEqual(x_val, 10.0 + 1.0 * num_workers * d.num_replicas_in_sync) self.assertEqual(y_val, 20.0 + 1.0 * num_workers * d.num_replicas_in_sync) self.assertEqual(z_val, 30.0 + 1.0 * num_workers) return (x_val == 10.0 + 1.0 * num_workers * d.num_replicas_in_sync and y_val == 20.0 + 1.0 * num_workers * d.num_replicas_in_sync and z_val == 30.0 + 1.0 * num_workers)
def testCond(self): with ops.Graph().as_default(): pred = array_ops.placeholder_with_default(True, shape=()) x = control_flow_ops.cond(pred, lambda: constant_op.constant(1), lambda: constant_op.constant(2)) self.assertIsNone(smart_cond.smart_constant_value(x))
def _testScopedExport(self, test_dir, exported_filenames): graph = ops.Graph() with graph.as_default(): # Creates an inference graph. # Hidden 1 colocate_constraint = constant_op.constant(1.2, name="constraint") images = constant_op.constant(1.2, dtypes.float32, shape=[100, 28], name="images") with ops.name_scope("hidden1"): with graph.colocate_with(colocate_constraint.op): weights1 = variables.Variable(random_ops.truncated_normal( [28, 128], stddev=1.0 / math.sqrt(float(28))), name="weights") # The use of control_flow_ops.cond here is purely for adding test # coverage the save and restore of control flow context (which doesn't # make any sense here from a machine learning perspective). The typical # biases is a simple Variable without the conditions. biases1 = variables.Variable(control_flow_ops.cond( math_ops.less(random.random(), 0.5), lambda: array_ops.ones([128]), lambda: array_ops.zeros([128])), name="biases") hidden1 = nn_ops.relu( math_ops.matmul(images, weights1) + biases1) # Hidden 2 with ops.name_scope("hidden2"): weights2 = variables.Variable(random_ops.truncated_normal( [128, 32], stddev=1.0 / math.sqrt(float(128))), name="weights") # The use of control_flow_ops.while_loop here is purely for adding test # coverage the save and restore of control flow context (which doesn't # make any sense here from a machine learning perspective). The typical # biases is a simple Variable without the conditions. def loop_cond(it, _): return it < 2 def loop_body(it, biases2): biases2 += constant_op.constant(0.1, shape=[32]) return it + 1, biases2 _, biases2 = control_flow_ops.while_loop( loop_cond, loop_body, [ constant_op.constant(0), variables.Variable(array_ops.zeros([32]), name="biases") ]) hidden2 = nn_ops.relu( math_ops.matmul(hidden1, weights2) + biases2) # Linear with ops.name_scope("softmax_linear"): weights3 = variables.Variable(random_ops.truncated_normal( [32, 10], stddev=1.0 / math.sqrt(float(32))), name="weights") biases3 = variables.Variable(array_ops.zeros([10]), name="biases") logits = math_ops.matmul(hidden2, weights3) + biases3 ops.add_to_collection("logits", logits) # Exports each sub-graph. # Exports the first one with unbound_inputs_col_name set to default. orig_meta_graph1, var_list = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[0]), graph=ops.get_default_graph(), export_scope="hidden1") self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) var_names = [v.name for _, v in var_list.items()] self.assertEqual(["hidden1/biases:0", "hidden1/weights:0"], sorted(var_names)) # Exports the rest with no unbound_inputs_col_name. orig_meta_graph2, _ = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[1]), graph=ops.get_default_graph(), export_scope="hidden2", unbound_inputs_col_name=None) orig_meta_graph3, _ = meta_graph.export_scoped_meta_graph( filename=os.path.join(test_dir, exported_filenames[2]), graph=ops.get_default_graph(), export_scope="softmax_linear", unbound_inputs_col_name=None) return [orig_meta_graph1, orig_meta_graph2, orig_meta_graph3]
def testMissingArg2(self): with ops.Graph().as_default(): with session.Session(): x = constant_op.constant(1) with self.assertRaises(TypeError): smart_cond.smart_cond(True, lambda: x)
def testStopGradient(self): with ops.Graph().as_default(): inp = constant(1.0, shape=[100, 32], name="in") out = array_ops.stop_gradient(inp) igrad = gradients.gradients(out, inp)[0] assert igrad is None
def _setup_training(self): """Sets up graph, model and trainer.""" # Create config if not given. if self._config is None: self._config = RunConfig(verbose=self.verbose) # Create new graph. self._graph = ops.Graph() self._graph.add_to_collection("IS_TRAINING", True) with self._graph.as_default(): random_seed.set_random_seed(self._config.tf_random_seed) self._global_step = variables.Variable(0, name="global_step", trainable=False) # Setting up inputs and outputs. self._inp, self._out = self._data_feeder.input_builder() # If class weights are provided, add them to the graph. # Different loss functions can use this tensor by name. if self.class_weight: self._class_weight_node = constant_op.constant( self.class_weight, name='class_weight') # Add histograms for X and y if they are floats. if self._data_feeder.input_dtype in (np.float32, np.float64): logging_ops.histogram_summary("X", self._inp) if self._data_feeder.output_dtype in (np.float32, np.float64): logging_ops.histogram_summary("y", self._out) # Create model's graph. self._model_predictions, self._model_loss = self.model_fn( self._inp, self._out) # Create trainer and augment graph with gradients and optimizer. # Additionally creates initialization ops. learning_rate = self.learning_rate optimizer = self.optimizer if callable(learning_rate): learning_rate = learning_rate(self._global_step) if callable(optimizer): optimizer = optimizer(learning_rate) self._train = optimizers.optimize_loss( self._model_loss, self._global_step, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) # Update ops during training, e.g. batch_norm_ops self._train = control_flow_ops.group( self._train, *ops.get_collection('update_ops')) # Merge all summaries into single tensor. self._summaries = logging_ops.merge_all_summaries() # Get all initializers for all trainable variables. self._initializers = variables.initialize_all_variables() # Create model's saver capturing all the nodes created up until now. self._saver = train.Saver( max_to_keep=self._config.keep_checkpoint_max, keep_checkpoint_every_n_hours=self._config. keep_checkpoint_every_n_hours) # Enable monitor to create validation data dict with appropriate tf placeholders self._monitor.create_val_feed_dict(self._inp, self._out) # Create session to run model with. self._session = session.Session(self._config.tf_master, config=self._config.tf_config) # Run parameter initializers. self._session.run(self._initializers)
def testGraphMode(self): graph = ops.Graph() with graph.as_default(), context.graph_mode(): array_ops.placeholder(dtypes.int32) self.assertLen(graph.get_operations(), 1)
def _restore(self, path): """Restores this estimator from given path. Note: will rebuild the graph and initialize all parameters, and will ignore provided model. Args: path: Path to checkpoints and other information. """ # Currently Saver requires absolute path to work correctly. path = os.path.abspath(path) self._graph = ops.Graph() with self._graph.as_default(): endpoints_filename = os.path.join(path, 'endpoints') if not os.path.exists(endpoints_filename): raise ValueError("Restore folder doesn't contain endpoints.") with gfile.Open(endpoints_filename) as foutputs: endpoints = foutputs.read().split('\n') graph_filename = os.path.join(path, 'graph.pbtxt') if not os.path.exists(graph_filename): raise ValueError( "Restore folder doesn't contain graph definition.") with gfile.Open(graph_filename) as fgraph: graph_def = graph_pb2.GraphDef() text_format.Merge(fgraph.read(), graph_def) (self._inp, self._out, self._model_predictions, self._model_loss) = importer.import_graph_def( graph_def, name='', return_elements=endpoints) saver_filename = os.path.join(path, 'saver.pbtxt') if not os.path.exists(saver_filename): raise ValueError( "Restore folder doesn't contain saver definition.") with gfile.Open(saver_filename) as fsaver: saver_def = train.SaverDef() text_format.Merge(fsaver.read(), saver_def) self._saver = train.Saver(saver_def=saver_def) # Restore trainer self._global_step = self._graph.get_tensor_by_name('global_step:0') self._train = self._graph.get_operation_by_name('train') # Restore summaries. self._summaries = self._graph.get_operation_by_name( 'MergeSummary/MergeSummary') # Restore session. if not isinstance(self._config, RunConfig): self._config = RunConfig(verbose=self.verbose) self._session = session.Session(self._config.tf_master, config=self._config.tf_config) checkpoint_path = train.latest_checkpoint(path) if checkpoint_path is None: raise ValueError( "Missing checkpoint files in the %s. Please " "make sure you are you have checkpoint file that describes " "latest checkpoints and appropriate checkpoints are there. " "If you have moved the folder, you at this point need to " "update manually update the paths in the checkpoint file." % path) self._saver.restore(self._session, checkpoint_path) # Set to be initialized. self._initialized = True
def _test_mixed_precision(self, task_type, task_id, num_gpus): """Tests mixed precision works with the CollectiveAllReduceStrategy. This tests: 1. Variables are in float32, by running with a small enough learning rate that if the variables are float16, their values wouldn't change when gradients are applied. 2. The loss scale is doubled if there are no NaNs. 3. The loss scale is halved if the first worker has a NaN, even if the other works do not have NaNs. Args: task_type: A string, such as "worker", indicating the type of the replica. task_id: Zero-indexed ID of the task. num_gpus: The number of GPUs to use. """ d, master_target, config = self._get_test_object( task_type, task_id, num_gpus) # Should be set to mixed_float16 by caller. self.assertEqual(policy.global_policy().name, 'mixed_float16') with ops.Graph().as_default(), \ self.cached_session(config=config, target=master_target) as sess: # The loss on the first worker is multiplied by this value. Allows # testing the first worker having NaN loss and gradients while keeping the # other workers' losses and gradients finite. loss_multiplier_for_first_worker = variables.Variable( 1., dtype='float16', trainable=False) with d.scope(): model = keras.Sequential([ mp_test_util.AddLayer(assert_type=dtypes.float16, input_shape=(1, )), ]) loss_scale = loss_scale_module.DynamicLossScale( 2**10, increment_period=1) def model_fn(): """Simple model to test mixed precision.""" x = np.ones((1, 1)) loss = model(x, training=True) if ((task_type == 'worker' and task_id == 0) or task_type is task_id is None): loss *= loss_multiplier_for_first_worker # Learning rate is small enough that if applied to a float16 variable, # the variable will not change. So this tests the learning rate is not # applied to a float16 value, but instead the float32 variable. optimizer = gradient_descent.GradientDescentOptimizer( 2**-14) optimizer = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer( optimizer, loss_scale) train_op = optimizer.minimize( loss, training_util.get_or_create_global_step()) return train_op train_op = d.extended.call_for_each_replica(model_fn) train_op = d.group(d.experimental_local_results(train_op)) sess.run(variables.global_variables_initializer()) sess.run(train_op) (var, ) = model.trainable_weights # Variable starts at 1. Each worker's gradient is 2 ** -14, the learning # rate, and each worker's gradient will be subtracted from the variable. expected = 1 - d.num_replicas_in_sync * 2**-14 self.assertEqual(sess.run(var), expected) # Loss scale should double, as are gradients are finite. self.assertEqual(sess.run(loss_scale()), 2**11) # Set the first worker to have NaN loss and gradients. sess.run(loss_multiplier_for_first_worker.assign(float('NaN'))) sess.run(train_op) # Variable should not change, since first worker had NaN self.assertEqual(sess.run(var), expected) # Loss scale should halve due to NaN self.assertEqual(sess.run(loss_scale()), 2**10)
def testConstructionNonSharded(self): with ops.Graph().as_default(): p = variables.Variable( array_ops.zeros(shape=[100, 100], dtype=dtypes.float32)) ids = constant_op.constant([0, 1, 1, 7], dtype=dtypes.int32) embedding_ops.embedding_lookup([p], ids)
def testNonSquare(self): init = init_ops.identity_initializer() shape = (10, 5) with self.session(graph=ops.Graph(), use_gpu=True): self.assertAllClose(init(shape).eval(), np.eye(*shape))
def export_savedmodel(self, export_dir_base, serving_input_receiver_fn, assets_extra=None, as_text=False, checkpoint_path=None): """Exports inference graph as a SavedModel into given dir. This method builds a new graph by first calling the serving_input_receiver_fn to obtain feature `Tensor`s, and then calling this `Estimator`'s model_fn to generate the model graph based on those features. It restores the given checkpoint (or, lacking that, the most recent checkpoint) into this graph in a fresh session. Finally it creates a timestamped export directory below the given export_dir_base, and writes a `SavedModel` into it containing a single `MetaGraphDef` saved from this session. The exported `MetaGraphDef` will provide one `SignatureDef` for each element of the export_outputs dict returned from the model_fn, named using the same keys. One of these keys is always signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, indicating which signature will be served when a serving request does not specify one. For each signature, the outputs are provided by the corresponding `ExportOutput`s, and the inputs are always the input receivers provided by the serving_input_receiver_fn. Extra assets may be written into the SavedModel via the extra_assets argument. This should be a dict, where each key gives a destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. Args: export_dir_base: A string containing a directory in which to create timestamped subdirectories containing exported SavedModels. serving_input_receiver_fn: A function that takes no argument and returns a `ServingInputReceiver`. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel, or `None` if no extra assets are needed. as_text: whether to write the SavedModel proto in text format. checkpoint_path: The checkpoint path to export. If `None` (the default), the most recent checkpoint found within the model directory is chosen. Returns: The string path to the exported directory. Raises: ValueError: if no serving_input_receiver_fn is provided, no export_outputs are provided, or no checkpoint can be found. """ if serving_input_receiver_fn is None: raise ValueError('serving_input_receiver_fn must be defined.') with ops.Graph().as_default() as g: self._create_and_assert_global_step(g) random_seed.set_random_seed(self._config.tf_random_seed) serving_input_receiver = serving_input_receiver_fn() # Call the model_fn and collect the export_outputs. estimator_spec = self._call_model_fn( features=serving_input_receiver.features, labels=None, mode=model_fn_lib.ModeKeys.PREDICT, config=self.config) # Build the SignatureDefs from receivers and all outputs signature_def_map = build_all_signature_defs( serving_input_receiver.receiver_tensors, estimator_spec.export_outputs, serving_input_receiver.receiver_tensors_alternatives) if not checkpoint_path: # Locate the latest checkpoint checkpoint_path = saver.latest_checkpoint(self._model_dir) if not checkpoint_path: raise ValueError("Couldn't find trained model at %s." % self._model_dir) export_dir = get_timestamped_export_dir(export_dir_base) temp_export_dir = get_temp_export_dir(export_dir) # TODO(soergel): Consider whether MonitoredSession makes sense here with tf_session.Session() as session: saver_for_restore = estimator_spec.scaffold.saver or saver.Saver( sharded=True) saver_for_restore.restore(session, checkpoint_path) # TODO(b/36111876): replace legacy_init_op with main_op mechanism # pylint: disable=protected-access local_init_op = ( estimator_spec.scaffold.local_init_op or monitored_session.Scaffold._default_local_init_op()) # pylint: enable=protected-access # Perform the export builder = saved_model_builder.SavedModelBuilder( temp_export_dir) builder.add_meta_graph_and_variables( session, [tag_constants.SERVING], signature_def_map=signature_def_map, assets_collection=ops.get_collection( ops.GraphKeys.ASSET_FILEPATHS), legacy_init_op=local_init_op) builder.save(as_text) # Add the extra assets if assets_extra: assets_extra_path = os.path.join( compat.as_bytes(temp_export_dir), compat.as_bytes('assets.extra')) for dest_relative, source in assets_extra.items(): dest_absolute = os.path.join( compat.as_bytes(assets_extra_path), compat.as_bytes(dest_relative)) dest_path = os.path.dirname(dest_absolute) gfile.MakeDirs(dest_path) gfile.Copy(source, dest_absolute) gfile.Rename(temp_export_dir, export_dir) return export_dir