def benchmarkMatrixSolveLsOp(self): run_gpu_test = test_lib.is_gpu_available(True) regularizer = 1.0 for matrix_shape in self.matrix_shapes: for num_rhs in 1, 2, matrix_shape[-1]: with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix, rhs = _GenerateTestData(matrix_shape, num_rhs) x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(x), min_iters=25, store_memory_usage=False, name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}" ).format(matrix_shape=matrix_shape, num_rhs=num_rhs)) if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513): with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/gpu:0"): matrix, rhs = _GenerateTestData(matrix_shape, num_rhs) x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(x), min_iters=25, store_memory_usage=False, name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_" "{num_rhs}").format( matrix_shape=matrix_shape, num_rhs=num_rhs))
def testSparseRepeatedIndices(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): repeated_index_update_var = variables.Variable( [[1.0], [2.0]], dtype=dtype) aggregated_update_var = variables.Variable( [[1.0], [2.0]], dtype=dtype) grad_repeated_index = ops.IndexedSlices( constant_op.constant( [0.1, 0.1], shape=[2, 1], dtype=dtype), constant_op.constant([1, 1]), constant_op.constant([2, 1])) grad_aggregated = ops.IndexedSlices( constant_op.constant( [0.2], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) repeated_update = adam.AdamOptimizer().apply_gradients( [(grad_repeated_index, repeated_index_update_var)]) aggregated_update = adam.AdamOptimizer().apply_gradients( [(grad_aggregated, aggregated_update_var)]) variables.global_variables_initializer().run() self.assertAllClose(aggregated_update_var.eval(), repeated_index_update_var.eval()) for _ in range(3): repeated_update.run() aggregated_update.run() self.assertAllClose(aggregated_update_var.eval(), repeated_index_update_var.eval())
def testInitializationOrder(self): with self.cached_session(): rnd = variables.Variable(random_ops.random_uniform([3, 6]), name="rnd") self.assertEqual("rnd:0", rnd.name) self.assertEqual([3, 6], rnd.get_shape()) self.assertEqual([3, 6], rnd.get_shape()) self.assertEqual([3, 6], rnd.shape) dep = variables.Variable(rnd.initialized_value(), name="dep") self.assertEqual("dep:0", dep.name) self.assertEqual([3, 6], dep.get_shape()) self.assertEqual([3, 6], dep.get_shape()) self.assertEqual([3, 6], dep.shape) # Currently have to set the shape manually for Add. added_val = rnd.initialized_value() + dep.initialized_value() + 2.0 added_val.set_shape(rnd.get_shape()) depdep = variables.Variable(added_val, name="depdep") self.assertEqual("depdep:0", depdep.name) self.assertEqual([3, 6], depdep.get_shape()) self.assertEqual([3, 6], depdep.get_shape()) self.assertEqual([3, 6], depdep.shape) variables.global_variables_initializer().run() self.assertAllClose(rnd.eval(), self.evaluate(dep)) self.assertAllClose(rnd.eval() + self.evaluate(dep) + 2.0, self.evaluate(depdep))
def testFractionalExampleLabel(self): # Setup test data with 1 positive, and 1 mostly-negative example. example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0.1), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = SdcaModel(examples, variables, options) variables_lib.global_variables_initializer().run() with self.assertRaisesOpError( 'Only labels of 0.0 or 1.0 are supported right now.'): lr.minimize().run()
def testCondNested(self): with context.graph_mode(), self.test_session(): v = resource_variable_ops.ResourceVariable(1.0) variables.global_variables_initializer().run() p = array_ops.placeholder(dtype=dtypes.bool) q = array_ops.placeholder(dtype=dtypes.bool) with function.AutomaticControlDependencies() as c: def true_fn(): v.assign(v + 1, name='true') return 1.0 def false_fn(): def inner_true_fn(): v.assign(v * 2, name='false_true') return 2.0 def inner_false_fn(): v.assign(v * 3, name='false_false') return 3.0 control_flow_ops.cond(q, inner_true_fn, inner_false_fn) return 1.0 control_flow_ops.cond(p, true_fn, false_fn) with ops.name_scope('final'): val = v.read_value() val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False, q: False}), 3.0) self.assertAllEqual(val.eval(feed_dict={p: False, q: True}), 6.0) self.assertAllEqual(val.eval(feed_dict={p: True, q: True}), 7.0) self.assertAllEqual(val.eval(feed_dict={p: True, q: False}), 8.0)
def testInitialization(self): with self.cached_session(): var0 = variables.VariableV1(0.0) self.assertEqual("Variable:0", var0.name) self.assertEqual("Variable", var0._shared_name) self.assertEqual([], var0.get_shape()) self.assertEqual([], var0.get_shape()) self.assertEqual([], var0.shape) var1 = variables.VariableV1(1.1) self.assertEqual("Variable_1:0", var1.name) self.assertEqual("Variable_1", var1._shared_name) self.assertEqual([], var1.get_shape()) self.assertEqual([], var1.get_shape()) self.assertEqual([], var1.shape) with self.assertRaisesOpError("Attempting to use uninitialized value"): self.evaluate(var0) with self.assertRaisesOpError("Attempting to use uninitialized value"): self.evaluate(var1) variables.global_variables_initializer().run() self.assertAllClose(0.0, self.evaluate(var0)) self.assertAllClose(1.1, self.evaluate(var1))
def testLoad(self): with self.cached_session(): var = variables.Variable(np.zeros((5, 5), np.float32)) variables.global_variables_initializer().run() var.load(np.ones((5, 5), np.float32)) self.assertAllClose(np.ones((5, 5), np.float32), self.evaluate(var))
def _test_acgan_helper(self, create_gan_model_fn): model = create_gan_model_fn() loss = train.gan_loss(model) loss_ac_gen = train.gan_loss(model, aux_cond_generator_weight=1.0) loss_ac_dis = train.gan_loss(model, aux_cond_discriminator_weight=1.0) self.assertTrue(isinstance(loss, namedtuples.GANLoss)) self.assertTrue(isinstance(loss_ac_gen, namedtuples.GANLoss)) self.assertTrue(isinstance(loss_ac_dis, namedtuples.GANLoss)) # Check values. with self.test_session(use_gpu=True) as sess: variables.global_variables_initializer().run() loss_gen_np, loss_ac_gen_gen_np, loss_ac_dis_gen_np = sess.run( [loss.generator_loss, loss_ac_gen.generator_loss, loss_ac_dis.generator_loss]) loss_dis_np, loss_ac_gen_dis_np, loss_ac_dis_dis_np = sess.run( [loss.discriminator_loss, loss_ac_gen.discriminator_loss, loss_ac_dis.discriminator_loss]) self.assertTrue(loss_gen_np < loss_dis_np) self.assertTrue(np.isscalar(loss_ac_gen_gen_np)) self.assertTrue(np.isscalar(loss_ac_dis_gen_np)) self.assertTrue(np.isscalar(loss_ac_gen_dis_np)) self.assertTrue(np.isscalar(loss_ac_dis_dis_np))
def _countUpToTest(self, dtype): with self.cached_session(): zero = constant_op.constant(0, dtype=dtype) var = variables.Variable(zero) count_up_to = var.count_up_to(3) variables.global_variables_initializer().run() self.assertEqual(0, self.evaluate(var)) self.assertEqual(0, self.evaluate(count_up_to)) self.assertEqual(1, self.evaluate(var)) self.assertEqual(1, self.evaluate(count_up_to)) self.assertEqual(2, self.evaluate(var)) self.assertEqual(2, self.evaluate(count_up_to)) self.assertEqual(3, self.evaluate(var)) with self.assertRaisesOpError("Reached limit of 3"): self.evaluate(count_up_to) self.assertEqual(3, self.evaluate(var)) with self.assertRaisesOpError("Reached limit of 3"): self.evaluate(count_up_to) self.assertEqual(3, self.evaluate(var))
def testSparseBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) grads0 = ops.IndexedSlices( constant_op.constant( [0.1], shape=[1, 1], dtype=dtype), constant_op.constant([0]), constant_op.constant([2, 1])) grads1 = ops.IndexedSlices( constant_op.constant( [0.01], shape=[1, 1], dtype=dtype), constant_op.constant([1]), constant_op.constant([2, 1])) ada_opt = adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([[1.0], [2.0]], var0.eval()) self.assertAllClose([[3.0], [4.0]], var1.eval()) # Run 3 step of sgd for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([[-1.6026098728179932], [2.0]]), var0.eval()) self.assertAllCloseAccordingToType( np.array([[3.0], [3.715679168701172]]), var1.eval())
def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) ada_opt = adagrad.AdagradOptimizer(3.0) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.assertEqual(["accumulator"], ada_opt.get_slot_names()) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEquals(slot1.get_shape(), var1.get_shape()) variables.global_variables_initializer().run() # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Mix the first and the second adagrad for 3 steps. ada_update1.run() ada_update2.run() ada_update1.run() # Validate updated params (the same as with only 1 Adagrad). self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), var1.eval())
def testConvertVariablesToConsts(self): with ops.Graph().as_default(): variable_node = variables.Variable(1.0, name="variable_node") _ = variables.Variable(1.0, name="unused_variable_node") output_node = math_ops_lib.multiply( variable_node, 2.0, name="output_node") with session.Session() as sess: init = variables.initialize_variables([variable_node]) sess.run(init) output = sess.run(output_node) self.assertNear(2.0, output, 0.00001) variable_graph_def = sess.graph.as_graph_def() # First get the constant_graph_def when variable_names_whitelist is set, # note that if variable_names_whitelist is not set an error will be # thrown because unused_variable_node is not initialized. constant_graph_def = graph_util.convert_variables_to_constants( sess, variable_graph_def, ["output_node"], variable_names_whitelist=set(["variable_node"])) # Then initialize the unused variable, and get another # constant_graph_def when variable_names_whitelist is not set. sess.run(variables.global_variables_initializer()) constant_graph_def_without_variable_whitelist = ( graph_util.convert_variables_to_constants(sess, variable_graph_def, ["output_node"])) # The unused variable should be cleared so the two graphs should be # equivalent. self.assertEqual( str(constant_graph_def), str(constant_graph_def_without_variable_whitelist)) # Test variable name black list. This should result in the variable not # being a const. sess.run(variables.global_variables_initializer()) constant_graph_def_with_blacklist = ( graph_util.convert_variables_to_constants( sess, variable_graph_def, ["output_node"], variable_names_blacklist=set(["variable_node"]))) variable_node = None for node in constant_graph_def_with_blacklist.node: if node.name == "variable_node": variable_node = node self.assertIsNotNone(variable_node) self.assertEqual(variable_node.op, "VariableV2") # Now we make sure the variable is now a constant, and that the graph still # produces the expected result. with ops.Graph().as_default(): _ = importer.import_graph_def(constant_graph_def, name="") self.assertEqual(4, len(constant_graph_def.node)) for node in constant_graph_def.node: self.assertNotEqual("Variable", node.op) self.assertNotEqual("VariableV2", node.op) with session.Session() as sess: output_node = sess.graph.get_tensor_by_name("output_node:0") output = sess.run(output_node) self.assertNear(2.0, output, 0.00001)
def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) opt = adamax.Adamax(constant_op.constant(0.001)) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) beta1_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adamax for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power.eval()) update.run() var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, var0.eval()) self.assertAllCloseAccordingToType(var1_np, var1.eval())
def testGRUCell(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2]) m = array_ops.zeros([1, 2]) g, _ = rnn_cell_impl.GRUCell(2)(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g], {x.name: np.array([[1., 1.]]), m.name: np.array([[0.1, 0.1]])}) # Smoke test self.assertAllClose(res[0], [[0.175991, 0.175991]]) with variable_scope.variable_scope( "other", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros( [1, 3]) # Test GRUCell with input_size != num_units. m = array_ops.zeros([1, 2]) g, _ = rnn_cell_impl.GRUCell(2)(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g], {x.name: np.array([[1., 1., 1.]]), m.name: np.array([[0.1, 0.1]])}) # Smoke test self.assertAllClose(res[0], [[0.156736, 0.156736]])
def benchmarkMatrixExponentialOp(self): for shape in self.shapes: with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) expm = linalg_impl.matrix_exponential(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(expm), min_iters=25, name="matrix_exponential_cpu_{shape}".format( shape=shape)) if test.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/gpu:0"): matrix = self._GenerateMatrix(shape) expm = linalg_impl.matrix_exponential(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group(expm), min_iters=25, name="matrix_exponential_gpu_{shape}".format( shape=shape))
def testAggregationMethod(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop if not context.executing_eagerly(): loss = loss() sgd = gradient_descent.SGD(3.0) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer opt_op = sgd.minimize( loss, var_list=[var0, var1], aggregation_method=gradients_impl.AggregationMethod .EXPERIMENTAL_ACCUMULATE_N) self.evaluate(variables.global_variables_initializer()) self.evaluate(opt_op) # Validate updated params self.assertAllClose([-14., -13.], self.evaluate(var0)) self.assertAllClose([-6., -5.], self.evaluate(var1))
def testAdaptiveLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) def loss(): return 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop sgd = gradient_descent.SGD(1.0) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 1 step of sgd through optimizer opt_op = sgd.minimize(loss, [var0, var1]) self.evaluate(variables.global_variables_initializer()) self.evaluate(opt_op) # Validate updated params # var0 = [1., 2.] - 1.0 * [5, 5] self.assertAllClose([-4., -3.], self.evaluate(var0)) # var1 = [3., 4.] - 1.0 * [3, 3] self.assertAllClose([0., 1.], self.evaluate(var1)) sgd.learning_rate = 0.5 if context.executing_eagerly(): sgd.minimize(loss, [var0, var1]) else: self.evaluate(opt_op) # Validate updated params # var0 = [-4., -3.] - 0.5 * [5, 5] self.assertAllClose([-6.5, -5.5], self.evaluate(var0)) # var1 = [0., 1.] - 0.5 * [3, 3] self.assertAllClose([-1.5, -0.5], self.evaluate(var1))
def testInitializeAllVariables(self): with self.test_session(): v = resource_variable_ops.ResourceVariable(1, dtype=dtypes.float32) with self.assertRaises(errors.NotFoundError): v.value().eval() variables.global_variables_initializer().run() self.assertEqual(1.0, v.value().eval())
def testToFromProto(self): with self.test_session(): v = resource_variable_ops.ResourceVariable(1.0) variables.global_variables_initializer().run() w = resource_variable_ops.ResourceVariable.from_proto(v.to_proto()) self.assertEquals(2, math_ops.add(w, 1).eval())
def benchmarkCholeskyOp(self): for shape in self.shapes: with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/cpu:0"): matrix = variables.Variable(self._GenerateMatrix(shape)) l = linalg_ops.cholesky(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group( l,), min_iters=25, name="cholesky_cpu_{shape}".format(shape=shape)) if test.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session() as sess, \ ops.device("/device:GPU:0"): matrix = variables.Variable(self._GenerateMatrix(shape)) l = linalg_ops.cholesky(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, control_flow_ops.group( l,), min_iters=25, name="cholesky_gpu_{shape}".format(shape=shape))
def _create_checkpoint(weights_and_biases, global_step, model_dir): """Create checkpoint file with provided model weights. Args: weights_and_biases: Iterable of tuples of weight and bias values. global_step: Initial global step to save in checkpoint. model_dir: Directory into which checkpoint is saved. """ weights, biases = zip(*weights_and_biases) model_weights = {} # Hidden layer weights. for i in range(0, len(weights) - 1): model_weights[_HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i] model_weights[_HIDDEN_BIASES_NAME_PATTERN % i] = biases[i] # Output layer weights. model_weights[_LOGITS_WEIGHTS_NAME] = weights[-1] model_weights[_LOGITS_BIASES_NAME] = biases[-1] with ops.Graph().as_default(): # Create model variables. for k, v in six.iteritems(model_weights): variables_lib.Variable(v, name=k, dtype=dtypes.float32) # Create non-model variables. global_step_var = training_util.create_global_step() # Initialize vars and save checkpoint. with tf_session.Session() as sess: variables_lib.global_variables_initializer().run() global_step_var.assign(global_step).eval() saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
def testConfig(self): opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9, nesterov=True) config = opt.get_config() opt2 = gradient_descent.SGD.from_config(config) lr = opt.lr lr2 = opt2.lr self.evaluate(variables.global_variables_initializer()) self.assertAllClose(self.evaluate(lr), self.evaluate(lr2)) self.assertAllClose( self.evaluate(opt._get_hyper("momentum")), self.evaluate(opt2._get_hyper("momentum"))) self.assertAllClose( self.evaluate(opt._get_hyper("decay")), self.evaluate(opt2._get_hyper("decay"))) var0 = variables.Variable([[1.0], [2.0]], dtype=dtypes.float32) loss = lambda: 3 * var0 # learning rate variable created when calling minimize. opt.minimize(loss, [var0]) self.evaluate(variables.global_variables_initializer()) config = opt.get_config() opt3 = gradient_descent.SGD.from_config(config) lr3 = opt3.lr self.evaluate(variables.global_variables_initializer()) self.assertAllClose(self.evaluate(lr), self.evaluate(lr3)) self.assertAllClose( self.evaluate(opt._get_hyper("momentum")), self.evaluate(opt3._get_hyper("momentum"))) self.assertAllClose( self.evaluate(opt._get_hyper("decay")), self.evaluate(opt3._get_hyper("decay"))) self.assertTrue(opt3.nesterov)
def testDenseFeaturesWeightedExamples(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0], [1.0]], [[0.5], [-0.5]]], weights=[3.0, 1.0], labels=[1.0, 0.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = SdcaModel(examples, variables, options) variables_lib.global_variables_initializer().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will # try to increase the margin from (1.0, 0.5). Due to regularization, # (1.0, -0.5) will be within the margin. For these points and example # weights, the optimal weights are w_1~=0.4 and w_2~=1.2 which give an L2 # loss of 0.5 * 0.25 * 0.25 * 1.6 = 0.2. The binary predictions will be # correct, but the boundary will be much closer to the 2nd point than the # first one. self.assertAllClose([1.0, -0.2], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.2, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.4, regularized_loss.eval(), atol=0.02)
def testDenseFeaturesPerfectlySeparable(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[1.0, 1.0], [1.0, -1.0]], weights=[1.0, 1.0], labels=[1.0, 0.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = SdcaModel(examples, variables, options) variables_lib.global_variables_initializer().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) # (1.0, 1.0) and (1.0, -1.0) are perfectly separable by x-axis (that is, # the SVM's functional margin >=1), so the unregularized loss is ~0.0. # There is only loss due to l2-regularization. For these datapoints, it # turns out that w_1~=0.0 and w_2~=1.0 which means that l2 loss is ~0.25. unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.0, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.25, regularized_loss.eval(), atol=0.02)
def testDenseFeaturesSeparableWithinMargins(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.5], [1.0, -0.5]]], weights=[1.0, 1.0], labels=[1.0, 0.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = SdcaModel(examples, variables, options) variables_lib.global_variables_initializer().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints # are within the margins so there is unregularized loss (1/2 per example). # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which # gives an L2 loss of ~0.25. self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02)
def testDenseFeaturesWithDefaultWeights(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0], [0.0]], [0.0, 1.0]], weights=[1.0, 1.0], labels=[10.0, -5.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='squared_loss') lr = SdcaModel(examples, variables, options) variables_lib.global_variables_initializer().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The loss function for these particular features is given by: # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So, # differentiating wrt to w_1, w_2 yields the following optimal values: # w_1* = label_1/(\lambda + 1)= 10/2, w_2* =label_2/(\lambda + 1)= -5/2. # In this case the (unnormalized regularized) loss will be: # 1/2(10-5)^2 + 1/2(5-5/2)^2 + 1/2(5^2 + (5/2)^2) = 125.0/4. The actual # loss should be further normalized by the sum of example weights. self.assertAllClose([5.0, -2.5], predictions.eval(), rtol=0.01) loss = lr.regularized_loss(examples) self.assertAllClose(125.0 / 8.0, loss.eval(), atol=0.01)
def testDenseFeaturesWithArbitraryWeights(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]], weights=[20.0, 10.0], labels=[10.0, -5.0]) options = dict( symmetric_l2_regularization=5.0, symmetric_l1_regularization=0, loss_type='squared_loss') lr = SdcaModel(examples, variables, options) variables_lib.global_variables_initializer().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The loss function for these particular features is given by: # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 + # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It # turns out that the optimal (variable) weights are given by: # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3. # In this case the (unnormalized regularized) loss will be: # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The # actual loss should be further normalized by the sum of example weights. self.assertAllClose([8.0, -10.0 / 3], predictions.eval(), rtol=0.01) loss = lr.regularized_loss(examples) self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01)
def testMetaGraphSaveLoad(self): save_prefix = os.path.join(self.get_temp_dir(), "ckpt") save_graph = ops.Graph() with save_graph.as_default(), self.test_session( graph=save_graph) as session: partitioner = partitioned_variables.fixed_size_partitioner(5, axis=0) with variable_scope.variable_scope("root", partitioner=partitioner): v0 = variable_scope.get_variable( "v0", dtype=dtypes.float32, shape=(10, 10)) v0_list = v0._get_variable_list() v0_part = v0._get_partitions() self.assertEqual(len(v0_list), 5) self.assertAllEqual(v0_part, (5, 1)) variables.global_variables_initializer().run() save_graph.get_collection_ref("partvar").append(v0) saver = saver_lib.Saver() save_graph.finalize() save_path = saver.save(sess=session, save_path=save_prefix) previous_value = session.run( save_graph.get_tensor_by_name(v0.name + ":0")) restore_graph = ops.Graph() with restore_graph.as_default(), self.test_session( graph=restore_graph) as session: saver = saver_lib.import_meta_graph(save_path + ".meta") saver.restore(sess=session, save_path=save_path) v0, = save_graph.get_collection_ref("partvar") self.assertIsInstance(v0, variables.PartitionedVariable) self.assertAllEqual( previous_value, session.run(restore_graph.get_tensor_by_name(v0.name + ":0")))
def testOutOfRangeSparseFeatures(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(0, 0) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type='logistic_loss') lr = SdcaModel(examples, variables, options) variables_lib.global_variables_initializer().run() train_op = lr.minimize() with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, 'indices.*'): train_op.run()
def testVariableInput(self): with self.test_session(): v = variable_scope.get_variable( 'X', initializer=init_ops.zeros_initializer(), shape=(1, 1)) x = core_layers.Dense(1)(v) variables.global_variables_initializer().run() self.assertAllEqual(x.eval(), [[0.0]])
def testPrepareFeaturesForSQSS(self): mode = model_fn_lib.ModeKeys.TRAIN seq_feature_name = 'seq_feature' sparse_seq_feature_name = 'wire_cast' ctx_feature_name = 'ctx_feature' sequence_length = 4 embedding_dimension = 8 features = { sparse_seq_feature_name: sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0], [2, 1, 1]], values=[ b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo', b'omar' ], dense_shape=[3, 2, 2]), seq_feature_name: constant_op.constant(1.0, shape=[sequence_length]), ctx_feature_name: constant_op.constant(2.0) } labels = constant_op.constant(5.0, shape=[sequence_length]) wire_cast = feature_column.sparse_column_with_keys( 'wire_cast', ['marlo', 'omar', 'stringer']) sequence_feature_columns = [ feature_column.real_valued_column(seq_feature_name, dimension=1), feature_column.embedding_column( wire_cast, dimension=embedding_dimension, initializer=init_ops.ones_initializer()) ] context_feature_columns = [ feature_column.real_valued_column(ctx_feature_name, dimension=1) ] expected_sequence = { rnn_common.RNNKeys.LABELS_KEY: np.array([5., 5., 5., 5.]), seq_feature_name: np.array([1., 1., 1., 1.]), sparse_seq_feature_name: sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0], [2, 1, 1]], values=[ b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo', b'omar' ], dense_shape=[3, 2, 2]), } expected_context = {ctx_feature_name: 2.} sequence, context = ssre._prepare_features_for_sqss( features, labels, mode, sequence_feature_columns, context_feature_columns) def assert_equal(expected, got): self.assertEqual(sorted(expected), sorted(got)) for k, v in expected.items(): if isinstance(v, sparse_tensor.SparseTensor): self.assertAllEqual(v.values.eval(), got[k].values) self.assertAllEqual(v.indices.eval(), got[k].indices) self.assertAllEqual(v.dense_shape.eval(), got[k].dense_shape) else: self.assertAllEqual(v, got[k]) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) actual_sequence, actual_context = sess.run([sequence, context]) assert_equal(expected_sequence, actual_sequence) assert_equal(expected_context, actual_context)
def _testSaveRestoreOutput(self, rnn_mode, direction, dtype): with ops.Graph().as_default(): num_layers = 2 num_units = 7 input_size = 7 seq_length = 10 batch_size = 5 dir_count = 1 if direction == cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION else 2 model = _CreateModel(rnn_mode, num_layers, num_units, input_size, direction=direction, dtype=dtype) params_size_t = model.params_size() params = variables.VariableV1(array_ops.ones([params_size_t], dtype=dtype), validate_shape=False, dtype=dtype) _CreateParamsSavable(params, model) save_path = os.path.join(self.get_temp_dir(), "save-restore-output-test") saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) np.random.seed(1234) has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM) input_data = constant_op.constant(np.random.randn( seq_length, batch_size, input_size), dtype=dtype) input_h = constant_op.constant(np.random.randn( num_layers * dir_count, batch_size, num_units), dtype=dtype) if has_input_c: input_c = constant_op.constant(np.random.randn( num_layers * dir_count, batch_size, num_units), dtype=dtype) outputs = model(input_data=input_data, input_h=input_h, input_c=input_c, params=params, is_training=False) else: outputs = model(input_data=input_data, input_h=input_h, params=params, is_training=False) total_sum = sum(map(math_ops.reduce_sum, outputs)) # Passing graph explicitly, otherwise an old sess would be reused. with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: sess.run(variables.global_variables_initializer()) total_sum_v = sess.run(total_sum) val = saver.save(sess, save_path) self.assertEqual(save_path, val) # Passing graph explicitly, otherwise an old sess would be reused. with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: reset_params = state_ops.assign(params, array_ops.zeros( [params_size_t], dtype=dtype), validate_shape=False) sess.run(reset_params) saver.restore(sess, save_path) total_sum_v_restored = sess.run(total_sum) self.assertAllClose(total_sum_v, total_sum_v_restored, atol=1e-5)
def freeze_and_run_tf(self, func, feed_dict, outputs, as_session, premade_placeholders, large_model, constant_fold): np.random.seed(1) # Make it reproducible. clean_feed_dict = {utils.node_name(k): v for k, v in feed_dict.items()} if is_tf2() and not as_session: # # use eager to execute the tensorflow func # # numpy doesn't work for all ops, make it tf.Tensor() input_tensors = [ tf.TensorSpec(shape=v.shape, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] input_list = [ tf.convert_to_tensor(v, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] tf.random.set_seed(1) result = func(*input_list) if isinstance(result, (list, tuple)): # list or tuple result = [x.numpy() for x in result] else: # single result result = [result.numpy()] # now make the eager functions a graph concrete_func = tf.function(func, input_signature=tuple(input_tensors)) concrete_func = concrete_func.get_concrete_function() graph_def = from_function(concrete_func, input_names=list(feed_dict.keys()), output_names=outputs, large_model=large_model) initialized_tables = None else: # # use graph to execute the tensorflow func # with tf_session() as sess: tf_set_random_seed(1) input_list = [] if not premade_placeholders: for k, v in clean_feed_dict.items(): input_list.append( tf_placeholder(name=k, shape=v.shape, dtype=tf.as_dtype(v.dtype))) func(*input_list) variables_lib.global_variables_initializer().run() tf_tables_initializer().run() output_dict = [] for out_name in outputs: output_dict.append(sess.graph.get_tensor_by_name(out_name)) result = sess.run(output_dict, feed_dict=feed_dict) graph_def = freeze_session(sess, input_names=list(feed_dict.keys()), output_names=outputs) table_names, key_dtypes, value_dtypes = get_hash_table_info( graph_def) initialized_tables = {} for n, k_dtype, val_dtype in zip(table_names, key_dtypes, value_dtypes): h = lookup_ops.hash_table_v2(k_dtype, val_dtype, shared_name=n) k, v = lookup_ops.lookup_table_export_v2( h, k_dtype, val_dtype) initialized_tables[n] = (sess.run(k), sess.run(v)) tf_reset_default_graph() with tf_session() as sess: tf.import_graph_def(graph_def, name='') graph_def = tf_optimize(list(feed_dict.keys()), outputs, graph_def, fold_constant=constant_fold) model_path = os.path.join( self.test_data_directory, self._testMethodName + "_after_tf_optimize.pb") utils.save_protobuf(model_path, graph_def) self.logger.debug("created file %s", model_path) return result, graph_def, initialized_tables
def testRunStepsWithOutputContext(self, distribution, optimizer_fn, is_tpu): with distribution.scope(): def dataset_fn(): dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() # TODO(priyag): batch with drop_remainder=True causes shapes to be # fully defined for TPU. Remove this when XLA supports dynamic shapes. return dataset.batch(batch_size=1, drop_remainder=True) optimizer = optimizer_fn() layer = core.Dense(1, use_bias=True) key1 = "foo" value1 = "bar" def model_fn(output_context, x): """A very simple model written by the user.""" def loss_fn(): y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) return y * y train_op = optimizer.minimize(loss_fn) loss = loss_fn() output_context.set_last_step_output( name="tower_loss_agg", output=loss, aggregation=variables_lib.VariableAggregation.MEAN) output_context.set_non_tensor_output(key1, value1) return (train_op, loss) def step_fn(output_context, inputs): (train_op, loss) = distribution.call_for_each_tower( model_fn, output_context, inputs, run_concurrently=False) output_context.set_last_step_output( name="cross_tower_loss_agg", output=loss, aggregation=variables_lib.VariableAggregation.MEAN) output_context.set_last_step_output( name="cross_tower_loss_noagg", output=loss) return distribution.group(train_op) iterator = distribution.distribute_dataset( dataset_fn).make_one_shot_iterator() def run_step(): initial_loss = lambda: constant_op.constant(1e7) # Initial values corresponding to aggregated losses are just single # tensors. But for non aggregated losses, we need to have initial # values that are of the same structure as non reduced losses. In # MirroredStrategy, this will be a list of losses, in TPUStrategy # it will be single tensor. Using `broadcast` followed by `unwrap` # gives us the desired initial value structure. initial_loop_values = { "tower_loss_agg": initial_loss(), "cross_tower_loss_agg": initial_loss(), "cross_tower_loss_noagg": distribution.unwrap(distribution.broadcast(initial_loss())) } ctx = distribution.run_steps_on_dataset( step_fn, iterator, iterations=2, initial_loop_values=initial_loop_values) self.assertEqual({key1: [value1]}, ctx.non_tensor_outputs) self._verify_loss_output( initial_loss(), loss_output=ctx.last_step_outputs["tower_loss_agg"], aggregated=True, distribution=distribution) self._verify_loss_output( initial_loss(), loss_output=ctx.last_step_outputs["cross_tower_loss_agg"], aggregated=True, distribution=distribution) self._verify_loss_output( initial_loss(), loss_output=ctx.last_step_outputs["cross_tower_loss_noagg"], aggregated=False, distribution=distribution) return (ctx.run_op, ctx.last_step_outputs["tower_loss_agg"]) self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.test_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) weights, biases, losses = [], [], [] for _ in range(5): _, loss = run_step() losses.append(loss) weights.append(self.evaluate(layer.kernel)) biases.append(self.evaluate(layer.bias)) self.evaluate(distribution.finalize()) loss_is_not_increasing = all(y <= x for x, y in zip(losses, losses[1:])) self.assertTrue(loss_is_not_increasing) error = abs( numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) error_is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) self.assertTrue(error_is_not_increasing)
def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction, use_callable_loss): with distribution.scope(): all_vars = [] def model_fn(x, y): def loss_fn(): # Use fixed initialization to make the steps deterministic. w = variable_scope.get_variable("w", initializer=[[2.]]) all_vars.append(w) predict = math_ops.matmul(x, w) return losses_impl.mean_squared_error( y, predict, reduction=loss_reduction) optimizer = optimizer_fn() # GradientDescent with 0.2 learning rate if use_callable_loss: return optimizer.minimize(loss_fn) else: return optimizer.minimize(loss_fn()) def dataset_fn(): features = dataset_ops.Dataset.from_tensors([[2.], [7.]]) labels = dataset_ops.Dataset.from_tensors([[6.], [21.]]) return dataset_ops.Dataset.zip((features, labels)).repeat() def step_fn(ctx, inputs): del ctx # Unused x, y = inputs return distribution.group( distribution.call_for_each_tower( model_fn, x, y, run_concurrently=False)) iterator = distribution.distribute_dataset( dataset_fn).make_one_shot_iterator() def run_step(): return distribution.run_steps_on_dataset( step_fn, iterator, iterations=1).run_op self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.test_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) run_step() v = all_vars[0] self.assertTrue(all([v is vi for vi in all_vars[1:]])) weight = numpy.squeeze(self.evaluate(v)) # Our model is: # predict = x * w # loss = (predict - y)^2 # dloss/dpredict = 2*(predict - y) # dloss/dw = 2 * x^T @ (predict - y) # For our batch size of 2, assuming sum loss reduction: # x = [2, 7] # y = [6, 21] # w_initial = 2 # predict = [4, 14] # predict - y = [-2, -7] # dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106 # So unreplicated the update to w with lr=0.2 is -0.2 * -106 = 21.2 # with sum loss reduction, or 10.6 with mean. if loss_reduction == losses_impl.Reduction.SUM: # Note that the "distribution.num_towers" factor will go away once # we split the input across towers, instead of pulling a complete # batch of input per tower. self.assertNear(weight, 2 + 21.2 * distribution.num_towers, 0.0001) else: # One of the mean loss reductions. self.assertNear(weight, 2 + 10.6, 0.0001) self.evaluate(distribution.finalize())
def testTrainNetworkWithBatchNorm(self, distribution, optimizer_fn, momentum, renorm, update_ops_in_cross_tower_mode): """Verifies that moving mean updates are reduced across towers.""" with distribution.scope(): num_towers = len(distribution.worker_devices) model_fn, dataset_fn, batchnorm = batchnorm_example( optimizer_fn, batch_per_epoch=num_towers, momentum=momentum, renorm=renorm, update_ops_in_tower_mode=not update_ops_in_cross_tower_mode) # Make sure prefetching is disabled since that makes the # specific input on each device to be non deterministic, and # this test relies on specific input being on each device. if isinstance(distribution, mirrored_strategy.MirroredStrategy): self.assertFalse(distribution._prefetch_on_device) def step_fn(ctx, inputs): del ctx # Unused fetches = distribution.unwrap( distribution.call_for_each_tower( model_fn, inputs, run_concurrently=batchnorm.built)) if update_ops_in_cross_tower_mode: fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS) return control_flow_ops.group(fetches) iterator = distribution.distribute_dataset( dataset_fn).make_one_shot_iterator() def run_step(): return distribution.run_steps_on_dataset( step_fn, iterator, iterations=1).run_op self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.test_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) expected_moving_means = [0.] * 8 def averaged_batch_mean(i): # Each batch has shape [16, 8] where the ith element in jth list is # (8 * j + i + tower_id * 100). So the batch mean in each tower is # (60 + i + tower_id * 100). So here comes its batch mean over all # towers: return 60. + i + (num_towers - 1.) / 2. * 100. for _ in range(10): run_step() moving_means = self.evaluate(batchnorm.moving_mean) # We make sure that the moving_mean is updated as if the sample mean is # calculated over all towers. for i, expected_moving_mean in enumerate(expected_moving_means): expected_moving_means[i] -= (( expected_moving_mean - averaged_batch_mean(i)) * (1.0 - momentum)) self.assertNear(expected_moving_means[i], moving_means[i], 0.0001) self.evaluate(distribution.finalize())
def testOptimizerInsideModelFn(self, distribution, optimizer_fn): created_variables = [] trainable_variables = [] def appending_creator(next_creator, *args, **kwargs): v = next_creator(*args, **kwargs) created_variables.append(v.name) if "trainable" in kwargs and kwargs["trainable"]: trainable_variables.append(v.name) return v # Creator scope needs to be set before it's used inside # `distribution.scope`. with variable_scope.variable_creator_scope( appending_creator), distribution.scope(): model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=True, create_optimizer_inside_model_fn=True) def step_fn(ctx, inputs): del ctx # Unused return distribution.group( distribution.call_for_each_tower( model_fn, inputs, run_concurrently=layer.built)) iterator = distribution.distribute_dataset( dataset_fn).make_one_shot_iterator() def run_step(): return distribution.run_steps_on_dataset( step_fn, iterator, iterations=1).run_op self.evaluate(distribution.initialize()) if not context.executing_eagerly(): with self.test_session() as sess: run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) run_step() self.evaluate(distribution.finalize()) def get_expected_variables(optimizer_fn, num_parameter_devices): variables_map = { "GradientDescent": ["dense/kernel", "dense/bias"], "Adam": [ "dense/kernel", "dense/bias", "beta1_power", "beta2_power", "dense/kernel/Adam", "dense/kernel/Adam_1", "dense/bias/Adam", "dense/bias/Adam_1" ] } variables = variables_map[optimizer_fn().get_name()] variables.extend([ v + "/replica_{}".format(replica) for v in variables for replica in range(1, num_parameter_devices) ]) return set([v + ":0" for v in variables]) self.assertEqual( get_expected_variables(optimizer_fn, len(distribution.parameter_devices)), set(created_variables))
def _testStackBidirectionalDynamicRNNStates(self, use_gpu): # Check that the states are correctly initialized. # - Create a net and iterate for 3 states. Keep the state (state_3). # - Reset states, and iterate for 5 steps. Last state is state_5. # - Reset the sets to state_3 and iterate for 2 more steps, # last state will be state_5'. # - Check that the state_5 and state_5' (forward and backward) are the # same for the first layer (it does not apply for the second layer since # it has forward-backward dependencies). with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: batch_size = 2 # Create states placeholders. initial_states_fw = [ array_ops.placeholder(dtypes.float32, shape=(batch_size, layer * 2)) for layer in self.layers ] initial_states_bw = [ array_ops.placeholder(dtypes.float32, shape=(batch_size, layer * 2)) for layer in self.layers ] # Create the net input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createStackBidirectionalDynamicRNN( use_gpu, use_shape=True, use_state_tuple=False, initial_states_fw=initial_states_fw, initial_states_bw=initial_states_bw)) variables.global_variables_initializer().run() # Run 3 steps. feed_dict = {inputs[0]: input_value, sequence_length: [3, 2]} # Initialize to empty state. for i, layer in enumerate(self.layers): feed_dict[initial_states_fw[i]] = np.zeros( (batch_size, layer * 2), dtype=np.float32) feed_dict[initial_states_bw[i]] = np.zeros( (batch_size, layer * 2), dtype=np.float32) _, st_3_fw, st_3_bw = sess.run([outputs, state_fw, state_bw], feed_dict=feed_dict) # Reset the net and run 5 steps. feed_dict = {inputs[0]: input_value, sequence_length: [5, 3]} for i, layer in enumerate(self.layers): feed_dict[initial_states_fw[i]] = np.zeros( (batch_size, layer * 2), dtype=np.float32) feed_dict[initial_states_bw[i]] = np.zeros( (batch_size, layer * 2), dtype=np.float32) _, st_5_fw, st_5_bw = sess.run([outputs, state_fw, state_bw], feed_dict=feed_dict) # Reset the net to state_3 and run 2 more steps. feed_dict = {inputs[0]: input_value, sequence_length: [2, 1]} for i, _ in enumerate(self.layers): feed_dict[initial_states_fw[i]] = st_3_fw[i] feed_dict[initial_states_bw[i]] = st_3_bw[i] out_5p, st_5p_fw, st_5p_bw = sess.run( [outputs, state_fw, state_bw], feed_dict=feed_dict) # Check that the 3+2 and 5 first layer states. self.assertAllEqual(st_5_fw[0], st_5p_fw[0]) self.assertAllEqual(st_5_bw[0], st_5p_bw[0])
def testOperatorOverload(self): v = resource_variable_ops.ResourceVariable(1.0, name="var0") self.evaluate(variables.global_variables_initializer()) self.assertEqual(2.0, self.evaluate(v + v))
def _test_minimize_loss_graph(self, task_type, task_id, num_gpus): d, master_target, config = self._get_test_object(task_type, task_id, num_gpus) with ops.Graph().as_default(), \ self.cached_session(config=config, target=master_target) as sess, \ d.scope(): initializer = functools.partial( init_ops_v2.GlorotUniform(), (1, 1), dtype=dtypes.float32) kernel = variables.Variable( initial_value=initializer, name='gpu_%d/kernel' % d.extended._num_gpus_per_worker, trainable=True) def loss_fn(x): y = array_ops.reshape( gen_math_ops.mat_mul(x, kernel), []) - constant_op.constant(1.) return y * y # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for # multiple graphs (b/111216820). def grad_fn(x): loss = loss_fn(x) var_list = ( variables.trainable_variables() + ops.get_collection( ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) grads = gradients.gradients(loss, var_list) ret = list(zip(grads, var_list)) return ret def update(v, g): return v.assign_sub(0.05 * g, use_locking=True) one = constant_op.constant([[1.]]) def step(): """Perform one optimization step.""" # Run forward & backward to get gradients, variables list. g_v = d.extended.call_for_each_replica(grad_fn, args=[one]) # Update the variables using the gradients and the update() function. before_list = [] after_list = [] for g, v in g_v: fetched = d.extended.read_var(v) before_list.append(fetched) with ops.control_dependencies([fetched]): # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.extended.reduce_to( reduce_util.ReduceOp.SUM, g, destinations=v) with ops.control_dependencies( d.extended.update(v, update, args=(g,), group=False)): after_list.append(d.extended.read_var(v)) return before_list, after_list before_out, after_out = step() if context.num_gpus() < d.extended._num_gpus_per_worker: return True sess.run(variables.global_variables_initializer()) for i in range(10): b, a = sess.run((before_out, after_out)) if i == 0: before, = b after, = a error_before = abs(before - 1) error_after = abs(after - 1) # Error should go down self.assertLess(error_after, error_before)
def testStridedSliceAssign(self): v = resource_variable_ops.ResourceVariable([1.0, 2.0]) self.evaluate(variables.global_variables_initializer()) self.evaluate(v[0].assign(2.0)) self.assertAllEqual(self.evaluate(v), [2.0, 2.0])
def testLoad(self): v = resource_variable_ops.ResourceVariable(1.0, name="var0") self.evaluate(variables.global_variables_initializer()) v.load(2.0) self.assertEqual(2.0, self.evaluate(v.value()))
def _test_simple_increment(self, task_type, task_id, num_gpus): d, master_target, sess_config = self._get_test_objects( task_type, task_id, num_gpus) if d.extended._cluster_spec: num_workers = len(d.extended._cluster_spec.as_dict().get(WORKER)) if 'chief' in d.extended._cluster_spec.as_dict(): num_workers += 1 else: num_workers = 1 with ops.Graph().as_default(), \ self.cached_session(target=master_target, config=sess_config) as sess, \ d.scope(): def model_fn(): x = variable_scope.get_variable( 'x', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) y = variable_scope.get_variable( 'y', initializer=20.0, aggregation=variable_scope.VariableAggregation.SUM) z = variable_scope.get_variable( 'z', initializer=30.0, aggregation=variable_scope.VariableAggregation. ONLY_FIRST_REPLICA) # We explicitly make a constant tensor here to avoid complaints about # summing non-distributed values. one = constant_op.constant(1.0) x_add = x.assign_add(one, use_locking=True) y_add = y.assign_add(one, use_locking=True) z_add = z.assign_add(one, use_locking=True) train_op = control_flow_ops.group(x_add, y_add, z_add) return x, y, z, train_op x, y, z, train_op = d.extended.call_for_each_replica(model_fn) train_op = d.group(train_op) if task_id == 0: self.evaluate(variables.global_variables_initializer()) # Workers waiting for chief worker's initializing variables. self._init_condition.acquire() self._init_reached += 1 while self._init_reached != num_workers: self._init_condition.wait() self._init_condition.notify_all() self._init_condition.release() sess.run(train_op) # Wait for other workers to finish training. self._finish_condition.acquire() self._finish_reached += 1 while self._finish_reached != num_workers: self._finish_condition.wait() self._finish_condition.notify_all() self._finish_condition.release() x_val, y_val, z_val = sess.run([x, y, z]) self.assertEqual(x_val, 10.0 + 1.0 * num_workers * d.num_replicas_in_sync) self.assertEqual(y_val, 20.0 + 1.0 * num_workers * d.num_replicas_in_sync) self.assertEqual(z_val, 30.0 + 1.0 * num_workers)
def testInitializeAllVariables(self): v = resource_variable_ops.ResourceVariable(1, dtype=dtypes.float32, name="var0") self.evaluate(variables.global_variables_initializer()) self.assertEqual(1.0, self.evaluate(v.value()))
def _test_device_assignment_distributed(self, task_type, task_id, num_gpus): worker_device = '/job:%s/replica:0/task:%d' % (task_type, task_id) d, _, sess_config = self._get_test_objects(task_type, task_id, num_gpus) with ops.Graph().as_default(), \ self.cached_session(target=self._default_target, config=sess_config) as sess, \ d.scope(): # Define a variable outside the call_for_each_replica scope. n = variable_scope.get_variable('n', initializer=10.0) self.assertEqual(n.device, '/job:ps/task:0') def model_fn(): if num_gpus == 0: last_part_device = 'device:CPU:0' else: replica_id = _get_replica_id_integer() last_part_device = ('device:GPU:%d' % replica_id) a = constant_op.constant(1.0) b = constant_op.constant(2.0) c = a + b self.assertEqual(a.device, worker_device + '/' + last_part_device) self.assertEqual(b.device, worker_device + '/' + last_part_device) self.assertEqual(c.device, worker_device + '/' + last_part_device) # The device scope is ignored for variables but not for normal ops. with ops.device('/job:worker/task:0'): x = variable_scope.get_variable( 'x', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) x_add = x.assign_add(c) e = a + c # The variable x is on the task 1 since the device_function has been # called once before the model_fn. self.assertEqual(x.device, '/job:ps/task:1') self.assertEqual(x_add.device, x.device) self.assertEqual( e.device, '/job:worker/replica:0/task:0/%s' % last_part_device) # The colocate_vars_with can override the distribution's device. with d.extended.colocate_vars_with(x): y = variable_scope.get_variable( 'y', initializer=20.0, aggregation=variable_scope.VariableAggregation.SUM) # We add an identity here to avoid complaints about summing # non-distributed values. y_add = y.assign_add(array_ops.identity(x_add)) self.assertEqual(y.device, '/job:ps/task:1') self.assertEqual(y_add.device, y.device) self.assertEqual(y.device, x.device) z = variable_scope.get_variable( 'z', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) self.assertEqual(z.device, '/job:ps/task:0') self.assertNotEqual(z.device, x.device) with ops.control_dependencies([y_add]): # We add an identity here to avoid complaints about summing # non-distributed values. z_add = z.assign_add(array_ops.identity(y)) with ops.control_dependencies([z_add]): f = z + c self.assertEqual(f.device, worker_device + '/' + last_part_device) # The device scope would merge with the default worker device. with ops.device('/CPU:1'): g = e + 1.0 self.assertEqual(g.device, worker_device + '/device:CPU:1') # Ths ops.colocate_with will be ignored when defining a variable but not # for a normal tensor. with ops.colocate_with(x): u = variable_scope.get_variable('u', initializer=30.0) v = variable_scope.get_variable('v', initializer=30.0) h = f + 1.0 self.assertIn('/job:ps/', u.device) self.assertIn('/job:ps/', v.device) # u and v are on different parameter servers. self.assertTrue(u.device != x.device or v.device != x.device) self.assertTrue(u.device == x.device or v.device == x.device) # Here h is not on one worker. Note h.device is canonical while x.device # is not but. self.assertIn('/job:ps/', h.device) return y_add, z_add, f y, z, f = d.extended.call_for_each_replica(model_fn) self.assertNotEqual(y, None) self.assertNotEqual(z, None) self.assertNotEqual(f, None) if context.num_gpus() >= 1 and num_gpus <= 1: self.evaluate(variables.global_variables_initializer()) y_val, z_val, f_val = sess.run([y, z, f]) self.assertEqual(y_val, 33.0) self.assertEqual(z_val, 43.0) self.assertEqual(f_val, 46.0)
def _test_minimize_loss_graph(self, task_type, task_id, num_gpus): d, master_target, sess_config = self._get_test_objects( task_type, task_id, num_gpus) if task_type: # Multi-worker assert hasattr(d.extended, '_cluster_spec') and d.extended._cluster_spec num_workers = len(d.extended._cluster_spec.as_dict().get(WORKER)) if CHIEF in d.extended._cluster_spec.as_dict(): num_workers += 1 else: # local num_workers = 1 with ops.Graph().as_default(), \ self.cached_session(target=master_target, config=sess_config) as sess, \ d.scope(): kernel = strategy_test_lib.create_variable_like_keras_layer( 'kernel', (1, 1), dtypes.float32, ) def loss_fn(x): y = array_ops.reshape(math_ops.matmul(x, kernel), []) - constant_op.constant(1.) return y * y # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for # multiple graphs (b/111216820). def grad_fn(x): loss = loss_fn(x) var_list = (variables.trainable_variables() + ops.get_collection( ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) grads = gradients.gradients(loss, var_list) ret = list(zip(grads, var_list)) return ret def update(v, g): return v.assign_sub(0.05 * g, use_locking=True) one = constant_op.constant([[1.]]) def step(): """Perform one optimization step.""" # Run forward & backward to get gradients, variables list. g_v = d.extended.call_for_each_replica(grad_fn, args=(one, )) # Update the variables using the gradients and the update() function. before_list = [] after_list = [] for g, v in g_v: fetched = d.extended.read_var(v) before_list.append(fetched) with ops.control_dependencies([fetched]): # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.extended.reduce_to(reduce_util.ReduceOp.SUM, g, destinations=v) with ops.control_dependencies( d.extended.update(v, update, args=(g, ), group=False)): after_list.append(d.extended.read_var(v)) return before_list, after_list before_out, after_out = step() if (not task_type or multi_worker_util.is_chief( d.extended._cluster_spec, task_type, task_id)): self.evaluate(variables.global_variables_initializer()) # Workers waiting for chief worker's initializing variables. self._init_condition.acquire() self._init_reached += 1 while self._init_reached != num_workers: self._init_condition.wait() self._init_condition.notify_all() self._init_condition.release() for i in range(10): b, a = sess.run((before_out, after_out)) if i == 0: before, = b after, = a error_before = abs(before - 1) error_after = abs(after - 1) # Error should go down self.assertLess(error_after, error_before)
def testDense(self): for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS: with test_util.use_gpu(): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype) var0 = resource_variable_ops.ResourceVariable(var0_np, dtype=dtype) var1 = resource_variable_ops.ResourceVariable(var1_np, dtype=dtype) grads0 = constant_op.constant(grads0_np, dtype=dtype) grads1 = constant_op.constant(grads1_np, dtype=dtype) opt = rmsprop.RMSprop(learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) if centered: mg0 = opt.get_slot(var0, "mg") mg1 = opt.get_slot(var1, "mg") else: mg0 = None mg1 = None rms0 = opt.get_slot(var0, "rms") self.assertTrue(rms0 is not None) rms1 = opt.get_slot(var1, "rms") self.assertTrue(rms1 is not None) mom0 = opt.get_slot(var0, "momentum") self.assertTrue(mom0 is not None) mom1 = opt.get_slot(var1, "momentum") self.assertTrue(mom1 is not None) mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 4 steps of RMSprop for _ in range(1, 5): self.evaluate(update) var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho, momentum, epsilon, centered) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho, momentum, epsilon, centered) # Validate updated params if centered: self.assertAllCloseAccordingToType( mg0_np, self.evaluate(mg0)) self.assertAllCloseAccordingToType( mg1_np, self.evaluate(mg1)) self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) self.assertAllCloseAccordingToType(mom0_np, self.evaluate(mom0)) self.assertAllCloseAccordingToType(mom1_np, self.evaluate(mom1)) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def _test_device_assignment_local(self, d, compute_device='CPU', variable_device='CPU', num_gpus=0): with ops.Graph().as_default(), \ self.cached_session(target=self._default_target, config=self._sess_config) as sess, \ d.scope(): def model_fn(): if 'CPU' in compute_device: replica_compute_device = '/device:CPU:0' else: replica_id = _get_replica_id_integer() replica_compute_device = ('/device:GPU:%d' % replica_id) replica_compute_device = device_util.canonicalize( replica_compute_device) if 'CPU' in variable_device: replica_variable_device = '/device:CPU:0' else: replica_id = _get_replica_id_integer() replica_variable_device = ('/device:GPU:%d' % replica_id) replica_variable_device = device_util.canonicalize( replica_variable_device) a = constant_op.constant(1.0) b = constant_op.constant(2.0) c = a + b self.assertEqual(a.device, replica_compute_device) self.assertEqual(b.device, replica_compute_device) self.assertEqual(c.device, replica_compute_device) # The device scope is ignored for variables but not for normal ops. with ops.device('/device:GPU:2'): x = variable_scope.get_variable( 'x', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) x_add = x.assign_add(c) e = a + c self.assertEqual(device_util.canonicalize(x.device), replica_variable_device) self.assertEqual(x_add.device, x.device) self.assertEqual(e.device, device_util.canonicalize('/device:GPU:2')) # The colocate_vars_with can override the distribution's device. with d.extended.colocate_vars_with(x): y = variable_scope.get_variable( 'y', initializer=20.0, aggregation=variable_scope.VariableAggregation.SUM) # We add an identity here to avoid complaints about summing # non-distributed values. y_add = y.assign_add(array_ops.identity(x_add)) self.assertEqual(device_util.canonicalize(y.device), replica_variable_device) self.assertEqual(y_add.device, y.device) self.assertEqual(y.device, x.device) z = variable_scope.get_variable( 'z', initializer=10.0, aggregation=variable_scope.VariableAggregation.SUM) self.assertEqual(device_util.canonicalize(z.device), replica_variable_device) with ops.control_dependencies([y_add]): # We add an identity here to avoid complaints about summing # non-distributed values. z_add = z.assign_add(array_ops.identity(y)) with ops.control_dependencies([z_add]): f = z + c self.assertEqual(f.device, replica_compute_device) # The device scope would merge with the default worker device. with ops.device('/CPU:1'): g = e + 1.0 self.assertEqual(g.device, device_util.canonicalize('/device:CPU:1')) # Ths ops.colocate_with will be ignored when defining a variable but not # for a normal tensor. with ops.colocate_with(x): u = variable_scope.get_variable('u', initializer=30.0) h = f + 1.0 self.assertEqual(device_util.canonicalize(u.device), replica_variable_device) self.assertEqual(device_util.canonicalize(x.device), device_util.canonicalize(h.device)) return y_add, z_add, f y, z, f = d.extended.call_for_each_replica(model_fn) self.assertNotEqual(y, None) self.assertNotEqual(z, None) self.assertNotEqual(f, None) if context.num_gpus() >= 1 and num_gpus <= 1: self.evaluate(variables.global_variables_initializer()) y_val, z_val, f_val = sess.run([y, z, f]) self.assertEqual(y_val, 33.0) self.assertEqual(z_val, 43.0) self.assertEqual(f_val, 46.0)
def _testFreezeGraph(self, saver_write_version): checkpoint_prefix = os.path.join(self.get_temp_dir(), "saved_checkpoint") checkpoint_meta_graph_file = os.path.join(self.get_temp_dir(), "saved_checkpoint.meta") checkpoint_state_name = "checkpoint_state" input_graph_name = "input_graph.pb" output_graph_name = "output_graph.pb" # We'll create an input graph that has a single variable containing 1.0, # and that then multiplies it by 2. with ops.Graph().as_default(): variable_node = variables.VariableV1(1.0, name="variable_node") output_node = math_ops.multiply(variable_node, 2.0, name="output_node") sess = session.Session() init = variables.global_variables_initializer() sess.run(init) output = sess.run(output_node) self.assertNear(2.0, output, 0.00001) saver = saver_lib.Saver(write_version=saver_write_version) checkpoint_path = saver.save(sess, checkpoint_prefix, global_step=0, latest_filename=checkpoint_state_name) graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name) # We save out the graph to disk, and then call the const conversion # routine. input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name) input_saver_def_path = "" input_binary = False output_node_names = "output_node" restore_op_name = "save/restore_all" filename_tensor_name = "save/Const:0" output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name) clear_devices = False input_meta_graph = checkpoint_meta_graph_file freeze_graph.freeze_graph(input_graph_path, input_saver_def_path, input_binary, checkpoint_path, output_node_names, restore_op_name, filename_tensor_name, output_graph_path, clear_devices, "", "", input_meta_graph, checkpoint_version=saver_write_version) # Now we make sure the variable is now a constant, and that the graph still # produces the expected result. with ops.Graph().as_default(): output_graph_def = graph_pb2.GraphDef() with open(output_graph_path, "rb") as f: output_graph_def.ParseFromString(f.read()) _ = importer.import_graph_def(output_graph_def, name="") self.assertEqual(4, len(output_graph_def.node)) for node in output_graph_def.node: self.assertNotEqual("VariableV2", node.op) self.assertNotEqual("Variable", node.op) with session.Session() as sess: output_node = sess.graph.get_tensor_by_name("output_node:0") output = sess.run(output_node) self.assertNear(2.0, output, 0.00001)
def test_savedmodel_state_override(self): random_model = RandomStateSpaceModel( state_dimension=5, state_noise_dimension=4, configuration=state_space_model.StateSpaceModelConfiguration( exogenous_feature_columns=[ layers.real_valued_column("exogenous") ], dtype=dtypes.float64, num_features=1)) estimator = estimators.StateSpaceRegressor( model=random_model, optimizer=gradient_descent.GradientDescentOptimizer(0.1)) combined_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader({ feature_keys.FilteringFeatures.TIMES: [1, 2, 3, 4], feature_keys.FilteringFeatures.VALUES: [1., 2., 3., 4.], "exogenous": [-1., -2., -3., -4.] })) estimator.train(combined_input_fn, steps=1) export_location = estimator.export_saved_model( self.get_temp_dir(), estimator.build_raw_serving_input_receiver_fn()) with ops.Graph().as_default() as graph: random_model.initialize_graph() with self.session(graph=graph) as session: variables.global_variables_initializer().run() evaled_start_state = session.run( random_model.get_start_state()) evaled_start_state = [ state_element[None, ...] for state_element in evaled_start_state ] with ops.Graph().as_default() as graph: with self.session(graph=graph) as session: signatures = loader.load(session, [tag_constants.SERVING], export_location) first_split_filtering = saved_model_utils.filter_continuation( continue_from={ feature_keys.FilteringResults.STATE_TUPLE: evaled_start_state }, signatures=signatures, session=session, features={ feature_keys.FilteringFeatures.TIMES: [1, 2], feature_keys.FilteringFeatures.VALUES: [1., 2.], "exogenous": [[-1.], [-2.]] }) second_split_filtering = saved_model_utils.filter_continuation( continue_from=first_split_filtering, signatures=signatures, session=session, features={ feature_keys.FilteringFeatures.TIMES: [3, 4], feature_keys.FilteringFeatures.VALUES: [3., 4.], "exogenous": [[-3.], [-4.]] }) combined_filtering = saved_model_utils.filter_continuation( continue_from={ feature_keys.FilteringResults.STATE_TUPLE: evaled_start_state }, signatures=signatures, session=session, features={ feature_keys.FilteringFeatures.TIMES: [1, 2, 3, 4], feature_keys.FilteringFeatures.VALUES: [1., 2., 3., 4.], "exogenous": [[-1.], [-2.], [-3.], [-4.]] }) split_predict = saved_model_utils.predict_continuation( continue_from=second_split_filtering, signatures=signatures, session=session, steps=1, exogenous_features={"exogenous": [[[-5.]]]}) combined_predict = saved_model_utils.predict_continuation( continue_from=combined_filtering, signatures=signatures, session=session, steps=1, exogenous_features={"exogenous": [[[-5.]]]}) for state_key, combined_state_value in combined_filtering.items(): if state_key == feature_keys.FilteringResults.TIMES: continue self.assertAllClose(combined_state_value, second_split_filtering[state_key]) for prediction_key, combined_value in combined_predict.items(): self.assertAllClose(combined_value, split_predict[prediction_key])
def test_traing_save_restore(self): opt = de.DynamicEmbeddingOptimizer(adam.AdamOptimizer(0.3)) id = 0 if test_util.is_gpu_available(): dim_list = [1, 2, 4, 8, 10, 16, 32, 64, 100, 256, 500] else: dim_list = [10] for key_dtype, value_dtype, dim, step in itertools.product( [dtypes.int64], [dtypes.float32], dim_list, [10], ): id += 1 save_dir = os.path.join(self.get_temp_dir(), "save_restore") save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), "hash") ids = script_ops.py_func(_create_dynamic_shape_tensor(), inp=[], Tout=key_dtype, stateful=True) params = de.get_variable( name="params-test-0915-" + str(id), key_dtype=key_dtype, value_dtype=value_dtype, initializer=init_ops.random_normal_initializer(0.0, 0.01), dim=dim, ) _, var0 = de.embedding_lookup(params, ids, return_trainable=True) def loss(): return var0 * var0 params_keys, params_vals = params.export() mini = opt.minimize(loss, var_list=[var0]) opt_slots = [opt.get_slot(var0, _s) for _s in opt.get_slot_names()] _saver = saver.Saver([params] + [_s.params for _s in opt_slots]) with self.session(config=default_config, use_gpu=test_util.is_gpu_available()) as sess: self.evaluate(variables.global_variables_initializer()) for _i in range(step): self.evaluate([mini]) size_before_saved = self.evaluate(params.size()) np_params_keys_before_saved = self.evaluate(params_keys) np_params_vals_before_saved = self.evaluate(params_vals) opt_slots_kv_pairs = [_s.params.export() for _s in opt_slots] np_slots_kv_pairs_before_saved = [ self.evaluate(_kv) for _kv in opt_slots_kv_pairs ] _saver.save(sess, save_path) with self.session(config=default_config, use_gpu=test_util.is_gpu_available()) as sess: self.evaluate(variables.global_variables_initializer()) self.assertAllEqual(0, self.evaluate(params.size())) _saver.restore(sess, save_path) params_keys_restored, params_vals_restored = params.export() size_after_restored = self.evaluate(params.size()) np_params_keys_after_restored = self.evaluate( params_keys_restored) np_params_vals_after_restored = self.evaluate( params_vals_restored) opt_slots_kv_pairs_restored = [ _s.params.export() for _s in opt_slots ] np_slots_kv_pairs_after_restored = [ self.evaluate(_kv) for _kv in opt_slots_kv_pairs_restored ] self.assertAllEqual(size_before_saved, size_after_restored) self.assertAllEqual( np.sort(np_params_keys_before_saved), np.sort(np_params_keys_after_restored), ) self.assertAllEqual( np.sort(np_params_vals_before_saved, axis=0), np.sort(np_params_vals_after_restored, axis=0), ) for pairs_before, pairs_after in zip( np_slots_kv_pairs_before_saved, np_slots_kv_pairs_after_restored): self.assertAllEqual( np.sort(pairs_before[0], axis=0), np.sort(pairs_after[0], axis=0), ) self.assertAllEqual( np.sort(pairs_before[1], axis=0), np.sort(pairs_after[1], axis=0), ) if test_util.is_gpu_available(): self.assertTrue( "GPU" in params.tables[0].resource_handle.device)
def testDenseWithLearningRateDecay(self): var0_np = np.array([1.0, 2.0]) grads0_np = np.array([0.1, 0.2]) var1_np = np.array([3.0, 4.0]) grads1_np = np.array([0.01, 0.2]) var0 = resource_variable_ops.ResourceVariable(var0_np) var1 = resource_variable_ops.ResourceVariable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = 0.01 rho = 0.9 momentum = 0.0 epsilon = 1e-7 centered = False decay = 0.5 opt = rmsprop.RMSprop(learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered, decay=decay) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) rms0 = opt.get_slot(var0, "rms") self.assertTrue(rms0 is not None) rms1 = opt.get_slot(var1, "rms") self.assertTrue(rms1 is not None) mom0 = opt.get_slot(var0, "momentum") self.assertTrue(mom0 is not None) mom1 = opt.get_slot(var1, "momentum") self.assertTrue(mom1 is not None) mg0_np = np.array([0.0, 0.0]) mg1_np = np.array([0.0, 0.0]) rms0_np = np.array([0.0, 0.0]) rms1_np = np.array([0.0, 0.0]) mom0_np = np.array([0.0, 0.0]) mom1_np = np.array([0.0, 0.0]) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 4 steps of RMSprop for t in range(2): self.evaluate(update) lr = learning_rate / (1 + decay * t) var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho, momentum, epsilon, centered) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho, momentum, epsilon, centered) # Validate updated params self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) self.assertAllCloseAccordingToType(mom0_np, self.evaluate(mom0)) self.assertAllCloseAccordingToType(mom1_np, self.evaluate(mom1)) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def test_save_restore(self): save_dir = os.path.join(self.get_temp_dir(), "save_restore") save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), "hash") with self.session(config=default_config, graph=ops.Graph()) as sess: v0 = variables.Variable(10.0, name="v0") v1 = variables.Variable(20.0, name="v1") keys = constant_op.constant([0, 1, 2], dtypes.int64) values = constant_op.constant([[0.0], [1.0], [2.0]], dtypes.float32) table = de.Variable( key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=-1.0, name="t1", dim=1, ) save = saver.Saver(var_list=[v0, v1, table]) self.evaluate(variables.global_variables_initializer()) # Check that the parameter nodes have been initialized. self.assertEqual(10.0, self.evaluate(v0)) self.assertEqual(20.0, self.evaluate(v1)) self.assertAllEqual(0, self.evaluate(table.size())) self.evaluate(table.upsert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) val = save.save(sess, save_path) self.assertIsInstance(val, six.string_types) self.assertEqual(save_path, val) del table with self.session(config=default_config, graph=ops.Graph()) as sess: v0 = variables.Variable(-1.0, name="v0") v1 = variables.Variable(-1.0, name="v1") table = de.Variable( name="t1", key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=-1.0, dim=1, checkpoint=True, ) self.evaluate( table.upsert( constant_op.constant([0, 1], dtypes.int64), constant_op.constant([[12.0], [24.0]], dtypes.float32), )) size_op = table.size() self.assertAllEqual(2, self.evaluate(size_op)) save = saver.Saver(var_list=[v0, v1, table]) # Restore the saved values in the parameter nodes. save.restore(sess, save_path) # Check that the parameter nodes have been restored. self.assertEqual([10.0], self.evaluate(v0)) self.assertEqual([20.0], self.evaluate(v1)) self.assertAllEqual(3, self.evaluate(table.size())) remove_keys = constant_op.constant([5, 0, 1, 2, 6], dtypes.int64) output = table.lookup(remove_keys) self.assertAllEqual([[-1.0], [0.0], [1.0], [2.0], [-1.0]], self.evaluate(output)) del table
def testSinglePartitionedVariable(self): """Ensures partitioned variables fail cleanly with freeze graph.""" checkpoint_prefix = os.path.join(self.get_temp_dir(), "saved_checkpoint") checkpoint_state_name = "checkpoint_state" input_graph_name = "input_graph.pb" output_graph_name = "output_graph.pb" # Create a graph with partition variables. When weights are partitioned into # a single partition, the weights variable is followed by a identity -> # identity (an additional identity node). partitioner = partitioned_variables.fixed_size_partitioner(1) with ops.Graph().as_default(): with variable_scope.variable_scope("part", partitioner=partitioner): batch_size, height, width, depth = 5, 128, 128, 3 input1 = array_ops.zeros((batch_size, height, width, depth), name="input1") input2 = array_ops.zeros((batch_size, height, width, depth), name="input2") num_nodes = depth filter1 = variable_scope.get_variable("filter", [num_nodes, num_nodes]) filter2 = array_ops.reshape(filter1, [1, 1, num_nodes, num_nodes]) conv = nn.conv2d(input=input1, filter=filter2, strides=[1, 1, 1, 1], padding="SAME") node = math_ops.add(conv, input2, name="test/add") node = nn.relu6(node, name="test/relu6") # Save graph and checkpoints. sess = session.Session() sess.run(variables.global_variables_initializer()) saver = saver_lib.Saver() checkpoint_path = saver.save(sess, checkpoint_prefix, global_step=0, latest_filename=checkpoint_state_name) graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name) # Ensure this graph has partition variables. self.assertTrue([ tensor.name.split(":")[0] for op in sess.graph.get_operations() for tensor in op.values() if re.search(r"/part_\d+/", tensor.name) ]) # Test freezing graph doesn't make it crash. output_node_names = "save/restore_all" output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name) return_value = freeze_graph.freeze_graph_with_def_protos( input_graph_def=sess.graph_def, input_saver_def=None, input_checkpoint=checkpoint_path, output_node_names=output_node_names, restore_op_name="save/restore_all", # default value filename_tensor_name="save/Const:0", # default value output_graph=output_graph_path, clear_devices=False, initializer_nodes="") self.assertTrue(return_value, -1)
def doTestBasic(self, use_resource=False, use_callable_params=False): num_updates = 4 # number of ADADELTA steps to perform for dtype in [dtypes.half, dtypes.float32]: for grad in [0.2, 0.1, 0.01]: for lr in [1.0, 0.5, 0.1]: var0_init = [1.0, 2.0] var1_init = [3.0, 4.0] if use_resource: var0 = resource_variable_ops.ResourceVariable( var0_init, dtype=dtype) var1 = resource_variable_ops.ResourceVariable( var1_init, dtype=dtype) else: var0 = variables.Variable(var0_init, dtype=dtype) var1 = variables.Variable(var1_init, dtype=dtype) grads = constant_op.constant([grad, grad], dtype=dtype) accum = 0.0 accum_update = 0.0 # ADADELTA gradient optimizer rho = 0.95 epsilon = 1e-8 if use_callable_params: adadelta_opt = adadelta.Adadelta( learning_rate=lambda: lr, # pylint: disable=cell-var-from-loop rho=lambda: rho, # pylint: disable=cell-var-from-loop epsilon=lambda: epsilon) # pylint: disable=cell-var-from-loop else: adadelta_opt = adadelta.Adadelta(learning_rate=lr, rho=rho, epsilon=epsilon) if not context.executing_eagerly(): adadelta_update = adadelta_opt.apply_gradients( zip([grads, grads], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Assign slots slot = [None] * 2 slot_update = [None] * 2 slot[0] = adadelta_opt.get_slot(var0, "accum_grad") self.assertEqual(slot[0].get_shape(), var0.get_shape()) slot_update[0] = adadelta_opt.get_slot( var0, "accum_var") self.assertEqual(slot_update[0].get_shape(), var0.get_shape()) slot[1] = adadelta_opt.get_slot(var1, "accum_grad") self.assertEqual(slot[1].get_shape(), var1.get_shape()) slot_update[1] = adadelta_opt.get_slot( var1, "accum_var") self.assertEqual(slot_update[1].get_shape(), var1.get_shape()) # Fetch params to validate initial values self.assertAllClose(var0_init, self.evaluate(var0)) self.assertAllClose(var1_init, self.evaluate(var1)) update = [None] * num_updates tot_update = 0 for step in range(num_updates): # Run adadelta update for comparison if not context.executing_eagerly(): self.evaluate(adadelta_update) else: adadelta_opt.apply_gradients( zip([grads, grads], [var0, var1])) # Perform initial update without previous accum values accum = accum * rho + (grad**2) * (1 - rho) update[step] = (np.sqrt(accum_update + epsilon) * (1. / np.sqrt(accum + epsilon)) * grad) accum_update = (accum_update * rho + (update[step]**2) * (1.0 - rho)) tot_update += update[step] * lr if not context.executing_eagerly(): # Check that the accumulators have been updated # TODO(lxuechen): This is hard to test in eager mode for slot_idx in range(2): self.assertAllCloseAccordingToType( np.array([accum, accum], dtype=dtype.as_numpy_dtype()), self.evaluate(slot[slot_idx]), rtol=1e-5) self.assertAllCloseAccordingToType( np.array([accum_update, accum_update], dtype=dtype.as_numpy_dtype()), self.evaluate(slot_update[slot_idx]), rtol=1e-5) # Check that the parameters have been updated self.assertAllCloseAccordingToType( np.array([ var0_init[0] - tot_update, var0_init[1] - tot_update ], dtype=dtype.as_numpy_dtype()), self.evaluate(var0), rtol=1e-5) self.assertAllCloseAccordingToType( np.array([ var1_init[0] - tot_update, var1_init[1] - tot_update ], dtype=dtype.as_numpy_dtype()), self.evaluate(var1), rtol=1e-5)
def test_save_restore_only_table(self): save_dir = os.path.join(self.get_temp_dir(), "save_restore") save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), "hash") with self.session( config=default_config, graph=ops.Graph(), use_gpu=test_util.is_gpu_available(), ) as sess: v0 = variables.Variable(10.0, name="v0") v1 = variables.Variable(20.0, name="v1") default_val = -1 keys = constant_op.constant([0, 1, 2], dtypes.int64) values = constant_op.constant([[0], [1], [2]], dtypes.int32) table = de.Variable( dtypes.int64, dtypes.int32, name="t1", initializer=default_val, checkpoint=True, ) save = saver.Saver([table]) self.evaluate(variables.global_variables_initializer()) # Check that the parameter nodes have been initialized. self.assertEqual(10.0, self.evaluate(v0)) self.assertEqual(20.0, self.evaluate(v1)) self.assertAllEqual(0, self.evaluate(table.size())) self.evaluate(table.upsert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) val = save.save(sess, save_path) self.assertIsInstance(val, six.string_types) self.assertEqual(save_path, val) del table with self.session( config=default_config, graph=ops.Graph(), use_gpu=test_util.is_gpu_available(), ) as sess: default_val = -1 table = de.Variable( dtypes.int64, dtypes.int32, name="t1", initializer=default_val, checkpoint=True, ) self.evaluate( table.upsert( constant_op.constant([0, 2], dtypes.int64), constant_op.constant([[12], [24]], dtypes.int32), )) self.assertAllEqual(2, self.evaluate(table.size())) save = saver.Saver([table._tables[0]]) # Restore the saved values in the parameter nodes. save.restore(sess, save_path) # Check that the parameter nodes have been restored. self.assertAllEqual(3, self.evaluate(table.size())) remove_keys = constant_op.constant([0, 1, 2, 3, 4], dtypes.int64) output = table.lookup(remove_keys) self.assertAllEqual([[0], [1], [2], [-1], [-1]], self.evaluate(output)) del table
def doTestBasic(self, use_resource=False, use_callable_params=False): if context.executing_eagerly() and not use_resource: self.skipTest( "Skipping test with use_resource=False and executing eagerly.") for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): with self.session(graph=ops.Graph()): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) if use_resource: var0 = resource_variable_ops.ResourceVariable( var0_np, name="var0_%d" % i) var1 = resource_variable_ops.ResourceVariable( var1_np, name="var1_%d" % i) else: var0 = variables.RefVariable(var0_np) var1 = variables.RefVariable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = lambda: 0.001 beta1 = lambda: 0.9 beta2 = lambda: 0.999 epsilon = lambda: 1e-8 if not use_callable_params: learning_rate = learning_rate() beta1 = beta1() beta2 = beta2() epsilon = epsilon() opt = adam.AdamOptimizer(learning_rate=learning_rate) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) opt_variables = opt.variables() beta1_power, beta2_power = opt._get_beta_accumulators() self.assertTrue(beta1_power is not None) self.assertTrue(beta2_power is not None) self.assertIn(beta1_power, opt_variables) self.assertIn(beta2_power, opt_variables) # Ensure that non-slot variables are the same type as the requested # variables. self.assertEqual( use_resource, resource_variable_ops.is_resource_variable(beta1_power)) self.assertEqual( use_resource, resource_variable_ops.is_resource_variable(beta2_power)) if not context.executing_eagerly(): with ops.Graph().as_default(): # Shouldn't return non-slot variables from other graphs. self.assertEqual(0, len(opt.variables())) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) beta1_power, beta2_power = opt._get_beta_accumulators() # Run 3 steps of Adam for t in range(1, 4): if not context.executing_eagerly(): self.evaluate(update) elif t > 1: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.assertAllCloseAccordingToType( 0.9**(t + 1), self.evaluate(beta1_power)) self.assertAllCloseAccordingToType( 0.999**(t + 1), self.evaluate(beta2_power)) var0_np, m0, v0 = adam_update_numpy( var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy( var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) if use_resource: self.assertEqual("var0_%d/Adam:0" % (i, ), opt.get_slot(var=var0, name="m").name)
def _benchmark(self, gradient_type, num_gpus, mode, loss_scaling): """Benchmarks loss scaling. We run a simple model with several scalar variables. The loss is the sum of all variables. The model is simple because we want to measure only the performance of loss scaling, not the performance of the model itself. Args: gradient_type: "optimizer" or "gradient_tape". How gradients are computed. "optimizer" uses Optimizer.minimize. "gradient_tape" uses GradientTape.gradient. num_gpus: The number of GPUs to use. Must be at least 1. mode: "eager", "tf_function", or "graph". "eager" means to use eager mode. "tf_function" means to use eager mode where all computations are wrapped in a tf.function. "graph" means to use TensorFlow 1's graph mode with a tf.compat.v1.Session. "graph" is unsupported with a LossScaleGradientTape. loss_scaling: "fixed", "dynamic", or None. The type of loss scaling to use. None means use no loss scaling, which is useful as a baseline to see how much slower loss scaling is in comparison. """ if mode == 'graph': graph = ops.Graph() ctx_mgr = graph.as_default() elif mode == 'eager': ctx_mgr = context.eager_mode() else: assert mode == 'tf_function' ctx_mgr = context.eager_mode() ls_str = loss_scaling or 'no_loss_scaling' name = '%s_%d_GPU_%s_%s' % (gradient_type, num_gpus, mode, ls_str) with ctx_mgr, _get_strategy(num_gpus).scope() as strategy: opt = adam.Adam() if loss_scaling == 'fixed': loss_scale = loss_scale_module.FixedLossScale(2.) elif loss_scaling == 'dynamic': # Make increment_period so high that it's effectively infinite. This # means the loss scale will never change. Any performance overhead # from increasing/decreasing the loss scale is typically negligible # since it happens infrequently, so we only benchmark the common case # of the loss scale not changing. increment_period = 1000000 loss_scale = loss_scale_module.DynamicLossScale( initial_loss_scale=2., increment_period=increment_period) else: assert loss_scaling is None loss_scale = None num_vars = 200 num_warmup_iters = 1 num_iters = 20 # By using scalar variables, we reduce overhead of the actual GPU work of # multiplying variables, dividing gradients, and checking gradients for # NaNs. Measuring these overheads isn't very useful as there is little we # can do to reduce them (one such way would be to fuse dividing gradients # and checking them for NaNs). We still have all other overheads, such as # all-reducing the `is_finite` values and having a tf.cond or # tf.while_loop based on whether gradients are NaNs. Currently, these # other overheads are much more significant than the GPU work. var_list = [ variables.Variable(i, dtype='float32') for i in range(num_vars) ] def get_loss(): return math_ops.add_n(var_list) if gradient_type == 'gradient_tape': tape_cls = (( lambda: lsgt_module.LossScaleGradientTape(loss_scale)) if loss_scale else backprop.GradientTape) def minimize_fn(): with tape_cls() as tape: loss = get_loss() grads = tape.gradient(loss, var_list) return opt.apply_gradients(zip(grads, var_list)) else: assert gradient_type == 'optimizer' if loss_scale: opt = loss_scale_optimizer.LossScaleOptimizer( opt, loss_scale) def minimize_fn(): return opt.minimize(get_loss, var_list) if mode == 'graph': run_op = strategy.experimental_run_v2(minimize_fn) init_op = variables.global_variables_initializer() with session_module.Session() as sess: sess.run(init_op) self.run_op_benchmark(sess, run_op, min_iters=num_iters, burn_iters=num_warmup_iters, name=name) return def run_fn(): strategy.experimental_run_v2(minimize_fn) if mode == 'tf_function': run_fn = def_function.function(run_fn) for _ in range(num_warmup_iters): run_fn() start = time.time() for _ in range(num_iters): run_fn() end = time.time() self.report_benchmark(iters=num_iters, wall_time=(end - start) / num_iters, name=name)
def _init_and_validate_variable(self, sess, variable_name, variable_value): v = variables.Variable(variable_value, name=variable_name) sess.run(variables.global_variables_initializer()) self.assertEqual(variable_value, v.eval())
def default_init_op(): return control_flow_ops.group( variables.global_variables_initializer(), resources.initialize_resources( resources.shared_resources()))