def testMap_Scoped(self): with self.cached_session() as sess: def double_scoped(x): """2x with a dummy 2 that is scoped.""" with variable_scope.variable_scope("body"): # Dummy variable, just to check that scoping works as intended. two = variable_scope.get_variable( "two", [], dtype=dtypes.int32, initializer=init_ops.constant_initializer(2)) return math_ops.multiply(x, two) with variable_scope.variable_scope("root") as varscope: elems = constant_op.constant([1, 2, 3, 4, 5, 6], name="data") doubles = np.array([2 * x for x in [1, 2, 3, 4, 5, 6]]) r = functional_ops.map_fn(double_scoped, elems) # Check that we have the one variable we asked for here. self.assertEqual(len(variables.trainable_variables()), 1) self.assertEqual(variables.trainable_variables()[0].name, "root/body/two:0") sess.run([variables.global_variables_initializer()]) self.assertAllEqual(doubles, self.evaluate(r)) # Now let's reuse our single variable. varscope.reuse_variables() r = functional_ops.map_fn(double_scoped, elems) self.assertEqual(len(variables.trainable_variables()), 1) self.assertAllEqual(doubles, self.evaluate(r))
def testFunctionalDenseTwiceReuse(self): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') vars1 = variables.trainable_variables() core_layers.dense(inputs, 2, name='my_dense', reuse=True) vars2 = variables.trainable_variables() self.assertEqual(vars1, vars2)
def testFunctionalConv2DReuse(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d(images, 32, [3, 3], name='conv1') self.assertEqual(len(variables.trainable_variables()), 2) conv_layers.conv2d(images, 32, [3, 3], name='conv1', reuse=True) self.assertEqual(len(variables.trainable_variables()), 2)
def testFunctionalConv3DTransposeNoReuse(self): depth, height, width = 5, 7, 9 volumes = random_ops.random_uniform((5, depth, height, width, 32), seed=1) conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) self.assertEqual(len(variables.trainable_variables()), 2) conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) self.assertEqual(len(variables.trainable_variables()), 4)
def testTensorLearningRateAndMomentum(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) mom_opt = momentum_lib.MomentumOptimizer( learning_rate=constant_op.constant(2.0), momentum=constant_op.constant(0.9)) mom_update = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Check we have slots self.assertEqual(["momentum"], mom_opt.get_slot_names()) slot0 = mom_opt.get_slot(var0, "momentum") self.assertEquals(slot0.get_shape(), var0.get_shape()) self.assertFalse(slot0 in variables.trainable_variables()) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEquals(slot1.get_shape(), var1.get_shape()) self.assertFalse(slot1 in variables.trainable_variables()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Step 1: the momentum accumulators where 0. So we should see a normal # update: v -= grad * learning_rate mom_update.run() # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType( np.array([0.1, 0.1]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([0.01, 0.01]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) # Step 2: the momentum accumulators contain the previous update. mom_update.run() # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType( np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) ]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([ 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1))
def testFunctionalConv1DNoReuse(self): length = 10 data = random_ops.random_uniform((5, length, 3), seed=1) conv_layers.separable_conv1d(data, 32, 3) self.assertEqual(len(variables.trainable_variables()), 3) conv_layers.separable_conv1d(data, 32, 3) self.assertEqual(len(variables.trainable_variables()), 6)
def testFunctionalConv2DTransposeNoReuse(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d_transpose(images, 32, [3, 3]) self.assertEqual(len(variables.trainable_variables()), 2) conv_layers.conv2d_transpose(images, 32, [3, 3]) self.assertEqual(len(variables.trainable_variables()), 4)
def testFunctionalDenseTwice(self): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) vars1 = variables.trainable_variables() core_layers.dense(inputs, 2) vars2 = variables.trainable_variables() self.assertEqual(len(vars1), 2) self.assertEqual(len(vars2), 4)
def _CheckDecay(self, ema, actual_decay, dim): tens = _Repeat(10.0, dim) thirties = _Repeat(30.0, dim) var0 = variables.Variable(tens, name="v0") var1 = variables.Variable(thirties, name="v1") variables.initialize_all_variables().run() # Note that tensor2 is not a Variable but just a plain Tensor resulting # from the sum operation. tensor2 = var0 + var1 update = ema.apply([var0, var1, tensor2]) avg0 = ema.average(var0) avg1 = ema.average(var1) avg2 = ema.average(tensor2) self.assertFalse(avg0 in variables.trainable_variables()) self.assertFalse(avg1 in variables.trainable_variables()) self.assertFalse(avg2 in variables.trainable_variables()) variables.initialize_all_variables().run() self.assertEqual("v0/ExponentialMovingAverage:0", avg0.name) self.assertEqual("v1/ExponentialMovingAverage:0", avg1.name) self.assertEqual("add/ExponentialMovingAverage:0", avg2.name) # Check initial values. self.assertAllClose(tens, var0.eval()) self.assertAllClose(thirties, var1.eval()) self.assertAllClose(_Repeat(10.0 + 30.0, dim), tensor2.eval()) # Check that averages are initialized correctly. self.assertAllClose(tens, avg0.eval()) self.assertAllClose(thirties, avg1.eval()) # Note that averages of Tensor's initialize to zeros_like since no value # of the Tensor is known because the Op has not been run (yet). self.assertAllClose(_Repeat(0.0, dim), avg2.eval()) # Update the averages and check. update.run() dk = actual_decay expected = _Repeat(10.0 * dk + 10.0 * (1 - dk), dim) self.assertAllClose(expected, avg0.eval()) expected = _Repeat(30.0 * dk + 30.0 * (1 - dk), dim) self.assertAllClose(expected, avg1.eval()) expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk), dim) self.assertAllClose(expected, avg2.eval()) # Again, update the averages and check. update.run() expected = _Repeat((10.0 * dk + 10.0 * (1 - dk)) * dk + 10.0 * (1 - dk), dim) self.assertAllClose(expected, avg0.eval()) expected = _Repeat((30.0 * dk + 30.0 * (1 - dk)) * dk + 30.0 * (1 - dk), dim) self.assertAllClose(expected, avg1.eval()) expected = _Repeat(((0.0 * dk + (10.0 + 30.0) * (1 - dk)) * dk + (10.0 + 30.0) * (1 - dk)), dim) self.assertAllClose(expected, avg2.eval())
def testFunctionalConv2DTransposeReuseFromScope(self): with variable_scope.variable_scope('scope'): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') self.assertEqual(len(variables.trainable_variables()), 2) with variable_scope.variable_scope('scope', reuse=True): conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') self.assertEqual(len(variables.trainable_variables()), 2)
def testFunctionalConv1DReuseFromScope(self): with variable_scope.variable_scope('scope'): length = 10 data = random_ops.random_uniform((5, length, 3), seed=1) conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') self.assertEqual(len(variables.trainable_variables()), 3) with variable_scope.variable_scope('scope', reuse=True): conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') self.assertEqual(len(variables.trainable_variables()), 3)
def testFunctionalConv3DTransposeReuseFromScope(self): with variable_scope.variable_scope('scope'): depth, height, width = 5, 7, 9 volumes = random_ops.random_uniform((5, depth, height, width, 32), seed=1) conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') self.assertEqual(len(variables.trainable_variables()), 2) with variable_scope.variable_scope('scope', reuse=True): conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') self.assertEqual(len(variables.trainable_variables()), 2)
def testTimeReversedFusedRNN(self): with self.test_session() as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890213) fw_cell = core_rnn_cell_impl.BasicRNNCell(10) bw_cell = core_rnn_cell_impl.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) # test bi-directional rnn with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, fw_state, bw_state = core_rnn.static_bidirectional_rnn( fw_cell, bw_cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_fw_state, basic_bw_state = sess.run( [packed_outputs, fw_state, bw_state]) basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope("fused", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( core_rnn_cell_impl.BasicRNNCell(10)) fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN( fused_rnn_cell.FusedRNNCellAdaptor( core_rnn_cell_impl.BasicRNNCell(10))) fw_outputs, fw_state = fused_cell( inputs, dtype=dtypes.float64, scope="fw") bw_outputs, bw_state = fused_bw_cell( inputs, dtype=dtypes.float64, scope="bw") outputs = array_ops.concat([fw_outputs, bw_outputs], 2) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] sess.run([variables.global_variables_initializer()]) fused_outputs, fused_fw_state, fused_bw_state = sess.run( [outputs, fw_state, bw_state]) fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_fw_state, fused_fw_state) self.assertAllClose(basic_bw_state, fused_bw_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def _rnn_get_variable(self, getter, *args, **kwargs): variable = getter(*args, **kwargs) trainable = (variable in tf_variables.trainable_variables() or (isinstance(variable, tf_variables.PartitionedVariable) and list(variable)[0] in tf_variables.trainable_variables())) if trainable and variable not in self._trainable_weights: self._trainable_weights.append(variable) elif not trainable and variable not in self._non_trainable_weights: self._non_trainable_weights.append(variable) return variable
def testFunctionalDenseTwiceReuseFromScope(self): with self.test_session(): with variable_scope.variable_scope('scope'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') vars1 = variables.trainable_variables() with variable_scope.variable_scope('scope', reuse=True): core_layers.dense(inputs, 2, name='my_dense') vars2 = variables.trainable_variables() self.assertEqual(vars1, vars2)
def compute_gradients(self, loss, var_list=None, gate_gradients=optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None): """Compute gradients of `loss` for the variables in `var_list`. Add rho*elastic_difference to loss to control the exploration This is the first part of `minimize()`. It returns a list of (gradient, variable) pairs where "gradient" is the gradient for "variable". Note that "gradient" can be a `Tensor`, an `IndexedSlices`, or `None` if there is no gradient for the given variable. Args: loss: A Tensor containing the value to minimize. var_list: Optional list or tuple of `tf.Variable` to update to minimize `loss`. Defaults to the list of variables collected in the graph under the key `GraphKey.TRAINABLE_VARIABLES`. gate_gradients: How to gate the computation of gradients. Can be `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. Returns: A list of (gradient, variable) pairs. Variable is always present, but gradient can be `None`. Raises: TypeError: If `var_list` contains anything else than `Variable` objects. ValueError: If some arguments are invalid. """ if not var_list: var_list = variables.trainable_variables() elastic_difference = [ math_ops.subtract(v, lv) for v, lv in zip(variables.trainable_variables(), [self._local_map[var] for var in var_list]) ] distance_loss = self._rho * math_ops.add_n( [gen_nn_ops.l2_loss(ed) for ed in elastic_difference]) total_loss = loss + distance_loss return self._opt.compute_gradients(total_loss, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, grad_loss)
def _rnn_get_variable(self, getter, *args, **kwargs): variable = getter(*args, **kwargs) if context.in_graph_mode(): trainable = (variable in tf_variables.trainable_variables() or (isinstance(variable, tf_variables.PartitionedVariable) and list(variable)[0] in tf_variables.trainable_variables())) else: trainable = variable._trainable # pylint: disable=protected-access if trainable and variable not in self._trainable_weights: self._trainable_weights.append(variable) elif not trainable and variable not in self._non_trainable_weights: self._non_trainable_weights.append(variable) return variable
def testLayerBasic(self): num_layers = 4 num_units = 2 batch_size = 8 direction = CUDNN_RNN_UNIDIRECTION dir_count = 1 with vs.variable_scope("main"): kernel_initializer = init_ops.constant_initializer(0.) bias_initializer = init_ops.constant_initializer(0.) inputs = random_ops.random_uniform([ num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, direction=direction, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name="awesome_lstm") # Build the layer outputs1, _ = lstm(inputs) # Reuse the layer outputs2, _ = lstm(inputs) total_sum1 = math_ops.reduce_sum(outputs1) total_sum2 = math_ops.reduce_sum(outputs2) with vs.variable_scope("main", reuse=True): lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, direction=direction, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name="awesome_lstm") # Reuse the layer outputs3, _ = lstm(inputs) total_sum3 = math_ops.reduce_sum(outputs3) self.assertEqual(1, len(variables.trainable_variables())) self.assertEqual(1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS))) self.assertEqual("main/awesome_lstm/opaque_kernel", variables.trainable_variables()[0].op.name) with self.test_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run( [total_sum1, total_sum2, total_sum3]) self.assertEqual(0, total_sum1_v) self.assertEqual(0, total_sum2_v) self.assertEqual(0, total_sum3_v)
def testCollectionsWithScope(self): with self.cached_session(): with ops.name_scope("scope_1"): var_x = variables.VariableV1(2.0) with ops.name_scope("scope_2"): var_y = variables.VariableV1(2.0) self.assertEqual([var_x, var_y], variables.global_variables()) self.assertEqual([var_x], variables.global_variables("scope_1")) self.assertEqual([var_y], variables.global_variables("scope_2")) self.assertEqual([var_x, var_y], variables.trainable_variables()) self.assertEqual([var_x], variables.trainable_variables("scope_1")) self.assertEqual([var_y], variables.trainable_variables("scope_2"))
def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None): """Compute gradients of `loss` for the variables in `var_list`. This is the first part of `minimize()`. It returns a list of (gradient, variable) pairs where "gradient" is the gradient for "variable". Note that "gradient" can be a `Tensor`, an `IndexedSlices`, or `None` if there is no gradient for the given variable. Args: loss: A Tensor containing the value to minimize. var_list: Optional list of `tf.Variable` to update to minimize `loss`. Defaults to the list of variables collected in the graph under the key `GraphKey.TRAINABLE_VARIABLES`. gate_gradients: How to gate the computation of gradients. Can be `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. Returns: A list of (gradient, variable) pairs. Variable is always present, but gradient can be `None`. Raises: TypeError: If `var_list` contains anything else than `Variable` objects. ValueError: If some arguments are invalid. """ if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH]: raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients) self._assert_valid_dtypes([loss]) if grad_loss is not None: self._assert_valid_dtypes([grad_loss]) if var_list is None: var_list = ( variables.trainable_variables() + ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) processors = [_get_processor(v) for v in var_list] if not var_list: raise ValueError("No variables to optimize.") var_refs = [p.target() for p in processors] grads = gradients.gradients( loss, var_refs, grad_ys=grad_loss, gate_gradients=(gate_gradients == Optimizer.GATE_OP), aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops) if gate_gradients == Optimizer.GATE_GRAPH: grads = control_flow_ops.tuple(grads) grads_and_vars = list(zip(grads, var_list)) self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None]) return grads_and_vars
def test_gradients_are_computed_with_mean_reduction(self): with self.test_session() as session: tower_specs = replicate_model_fn._get_loss_towers( self.model_fn, mode=model_fn_lib.ModeKeys.EVAL, features=[[0.6], [1.6]], labels=[[0.6], [0.6]], params=None, loss_reduction=losses.Reduction.MEAN, config=None, devices=['/gpu:0', '/gpu:1'], local_ps_devices=['/gpu:0'], name_scope_pattern='test_tower_{}') session.run(variables.global_variables_initializer()) self.assertEqual(len(tower_specs), 2) self.assertEqual('/device:GPU:0', tower_specs[0].loss.device) self.assertEqual('averaged_loss:0', tower_specs[0].loss.name) self.assertEqual(0.5, session.run(tower_specs[0].loss)) self.assertEqual('/device:GPU:1', tower_specs[1].loss.device) self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name) # The input batch for the second tower had a loss that is 1.0 # bigger: 0.6 vs 1.6. self.assertEqual(1.0, session.run(tower_specs[1].loss)) self.assertEqual(1, len(variables.global_variables())) self.assertEqual(1, len(variables.trainable_variables())) with variable_scope.variable_scope('', reuse=True): c = variable_scope.get_variable('c', dtype=dtypes.float64) self.assertEqual(0.25, session.run(c))
def testWithIsRecomputeKwarg(self): kwarg_values = [] @rev_block_lib.recompute_grad def layer_with_recompute(inputs, is_recomputing=False): kwarg_values.append(is_recomputing) out = core_layers.dense(inputs, 2) out = normalization_layers.batch_normalization(out, training=True) if is_recomputing: # Ensure that the updates are not duplicated by popping off the latest # 2 additions. update_ops = ops.get_collection_ref(ops.GraphKeys.UPDATE_OPS) update_ops.pop() update_ops.pop() return out x = array_ops.ones((2, 4), dtypes.float32) with variable_scope.variable_scope("layer1", use_resource=True): y = layer_with_recompute(x) loss = math_ops.reduce_sum(y) tvars = variables.trainable_variables() gradients_impl.gradients(loss, [x] + tvars) update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) self.assertEqual(2, len(update_ops)) self.assertEqual([False, True], kwarg_values)
def loop_fn(i): image = array_ops.gather(images, i) label = array_ops.gather(labels, i) logits = array_ops.reshape(model(image, training=training), [-1]) loss = losses.softmax_cross_entropy( logits=logits, onehot_labels=label, reduction=losses.Reduction.NONE) return gradient_ops.gradients(loss, variables.trainable_variables())
def testFunctionalDenseInScope(self): with variable_scope.variable_scope('test'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') var = variables.trainable_variables()[0] self.assertEqual(var.name, 'test/my_dense/weights:0') with variable_scope.variable_scope('test1') as scope: inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name=scope) var = variables.trainable_variables()[2] self.assertEqual(var.name, 'test1/weights:0') with variable_scope.variable_scope('test2'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) var = variables.trainable_variables()[4] self.assertEqual(var.name, 'test2/dense/weights:0')
def create_lstm_per_eg_grad(batch_size, state_size, steps): inputs = [ random_ops.random_normal([batch_size, state_size]) for _ in range(steps) ] cell = rnn_cell.BasicLSTMCell(state_size) init_state = cell.zero_state(batch_size, dtypes.float32) def model_fn(inps, init_state): state = init_state for inp in inps: _, state = cell(inp, state) output = nn.l2_loss(state.c) return gradient_ops.gradients(output, variables.trainable_variables()) def loop_fn(i): loop_inputs = [ array_ops.expand_dims(array_ops.gather(x, i), 0) for x in inputs ] loop_init_state = rnn_cell.LSTMStateTuple( *[array_ops.expand_dims(array_ops.gather(x, i), 0) for x in init_state]) return model_fn(loop_inputs, loop_init_state) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) loop_fn_dtypes = [x.dtype for x in variables.trainable_variables()] while_outputs = control_flow_ops.for_loop(loop_fn, loop_fn_dtypes, batch_size) return pfor_outputs, while_outputs
def apply(self, var_list=None): # TODO(touts): op_scope if var_list is None: var_list = variables.trainable_variables() for var in var_list: if var.dtype.base_dtype not in [dtypes.float32, dtypes.float64]: raise TypeError( "The variables must be float or double: %s" % var) if var in self._averages: raise ValueError( "Moving average already computed for: %s" % var) # For variables: to lower communication bandwidth across devices we keep # the moving averages on the same device as the variables. For other # tensors, we rely on the existing device allocation mechanism. if isinstance(var, variables.Variable): avg = slot_creator.create_slot( var, var.initialized_value(), self._name, colocate_with_primary=True) else: avg = slot_creator.create_zeros_slot( var, self._name, colocate_with_primary=(var.op.type == "Variable")) self._averages[var] = avg with ops.name_scope(self._name) as scope: decay = self._num_updates / (self._num_updates + 1) updates = [] updates.append(self._num_updates_op) for var in var_list: updates.append(assign_moving_average( self._averages[var], var, decay)) return control_flow_ops.group(*updates, name=scope)
def _create_multi_lstm_cell_ops(batch_size, num_units, input_depth, num_layers, max_time, compiled): with variable_scope.variable_scope( "root", initializer=init_ops.random_uniform_initializer(-0.1, 0.1, seed=2)): inputs = variable_scope.get_variable( "inputs", initializer=random_ops.random_uniform( (max_time, batch_size, input_depth), seed=1)) maybe_xla = lambda c: rnn_cell.CompiledWrapper(c) if compiled else c cell = core_rnn_cell_impl.MultiRNNCell( [maybe_xla(core_rnn_cell_impl.LSTMCell(num_units)) for _ in range(num_layers)]) initial_state = cell.zero_state( batch_size=batch_size, dtype=dtypes.float32) outputs, final_state = rnn.dynamic_rnn( cell=cell, inputs=inputs, initial_state=initial_state, time_major=True) flat_final_state = nest.flatten(final_state) trainable_variables = variables.trainable_variables() outputs_grad = gradients_impl.gradients( [outputs], trainable_variables + [inputs] + nest.flatten(initial_state)) final_state_grad = gradients_impl.gradients( flat_final_state, trainable_variables + [inputs] + nest.flatten(initial_state)) return {"outputs": outputs, "final_state": flat_final_state, "outputs_grad": outputs_grad, "final_state_grad": final_state_grad}
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) # variables_to_restore = variable_averages.variables_to_restore() variables_to_restore = variable_averages.variables_to_restore(variables.trainable_variables()) saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def testCustomGrad(self): def fn(a, b, c): return core_layers.dense(a, 10, use_bias=False) + math_ops.matmul(b, c) def grad_fn(inputs, trainable_variables, unused_outputs, unused_grad_outputs): grad_inputs = [ array_ops.ones_like(t) * (i + 1.) for i, t in enumerate(inputs) ] grad_vars = [ array_ops.ones_like(t) * (i + len(inputs) + 1.) for i, t in enumerate(trainable_variables) ] return grad_inputs, grad_vars a = random_ops.random_uniform([11, 6]) b = random_ops.random_uniform([11, 7]) c = random_ops.random_uniform([7, 10]) w = random_ops.random_uniform([6, 10]) out = rev_block_lib._fn_with_custom_grad(grad_fn)(fn)(a, b, c) loss = math_ops.reduce_mean(out) grads = gradients_impl.gradients( loss, [a, b, c, variables.trainable_variables()[0]]) expected_grads = [ array_ops.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w]) ] with self.test_session() as sess: sess.run(variables.global_variables_initializer()) g_val, eg_val = sess.run([grads, expected_grads]) for g1, g2 in zip(g_val, eg_val): self.assertAllClose(g1, g2)
def testReuse(self): def f(x): return core_layers.dense(x, self.CHANNELS // 2) def g(x): return core_layers.dense(x, self.CHANNELS // 2) x = random_ops.random_uniform( [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32) x1, x2 = array_ops.split(x, 2, axis=-1) with variable_scope.variable_scope("test"): y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS) num_vars_before = len(variables.global_variables()) with variable_scope.variable_scope("test", reuse=True): y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS) num_vars_after = len(variables.global_variables()) self.assertEqual(num_vars_before, num_vars_after) loss = math_ops.reduce_mean(y1 + y2) _ = gradients_impl.gradients(loss, [x] + variables.trainable_variables()) with variable_scope.variable_scope("test", reuse=True): y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS) num_vars_after = len(variables.global_variables()) self.assertEqual(num_vars_before, num_vars_after)
def testFunctionalConv2DInitializerFromScope(self): with self.test_session() as sess: with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') weights = variables.trainable_variables() # Check the names of weights in order. self.assertTrue('depthwise_kernel' in weights[0].name) self.assertTrue('pointwise_kernel' in weights[1].name) self.assertTrue('bias' in weights[2].name) sess.run(variables.global_variables_initializer()) weights = sess.run(weights) # Check that the kernel weights got initialized to ones (from scope) self.assertAllClose(weights[0], np.ones((3, 3, 3, 1))) self.assertAllClose(weights[1], np.ones((1, 1, 3, 32))) # Check that the bias still got initialized to zeros. self.assertAllClose(weights[2], np.zeros((32)))
def debug_grads(sess, feed_dict): var_list = (variables.trainable_variables() + ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) print('variables') for v in var_list: print(' ', v.name) # get all gradients grads_and_vars = optimizer.compute_gradients(loss) # train_op = optimizer.apply_gradients(grads_and_vars) zipped_val = sess.run(grads_and_vars, feed_dict=feed_dict) for rsl, tensor in zip(zipped_val, grads_and_vars): print('-----------------------------------------') print( 'name', tensor[0].name.replace('/tuple/control_dependency_1:0', '').replace('gradients/', '')) print('gradient', rsl[0]) print('value', rsl[1])
def test_run_inception_multiple_outputs(self): """Test `run_inception` graph construction with multiple outputs.""" batch_size = 3 img = array_ops.ones([batch_size, 299, 299, 3]) logits, pool = _run_with_mock( classifier_metrics.run_inception, img, output_tensor=[ classifier_metrics.INCEPTION_OUTPUT, classifier_metrics.INCEPTION_FINAL_POOL ]) self.assertTrue(isinstance(logits, ops.Tensor)) self.assertTrue(isinstance(pool, ops.Tensor)) logits.shape.assert_is_compatible_with([batch_size, 1001]) pool.shape.assert_is_compatible_with([batch_size, 2048]) # Check that none of the model variables are trainable. self.assertListEqual([], variables.trainable_variables())
def testReuse(self): def f(x): return core_layers.dense(x, self.CHANNELS // 2) def g(x): return core_layers.dense(x, self.CHANNELS // 2) x = random_ops.random_uniform([self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32) x1, x2 = array_ops.split(x, 2, axis=-1) with variable_scope.variable_scope("test"): y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS) num_vars_before = len(variables.global_variables()) with variable_scope.variable_scope("test", reuse=True): y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS) num_vars_after = len(variables.global_variables()) self.assertEqual(num_vars_before, num_vars_after) loss = math_ops.reduce_mean(y1 + y2) _ = gradients_impl.gradients(loss, [x] + variables.trainable_variables()) with variable_scope.variable_scope("test", reuse=True): y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS) num_vars_after = len(variables.global_variables()) self.assertEqual(num_vars_before, num_vars_after)
def get_init_op(self, task_index): """Returns the op to let all the local variables and local center variables equal to the global center variables before the training begins """ init_ops = [] local_vars = variables.trainable_variables() global_center_vars = [self._global_map[var] for var in local_vars] grad_vars = [self._grad_map[var] for var in local_vars] if not (local_vars and global_center_vars and grad_vars): raise ValueError( 'The lists of local_variables, global_center_variables,' 'grad_center_variables should not be empty') for lvar, gc_var in zip(local_vars, global_center_vars): init_ops.append(state_ops.assign(lvar, gc_var)) for g in grad_vars: init_ops.append(state_ops.assign(g, array_ops.zeros_like(g))) init_op = control_flow_ops.group(*(init_ops)) return init_op
def loss(self, data, labels): """The loss to minimize while training.""" if self.is_regression: diff = self.training_inference_graph(data) - math_ops.cast( labels, dtypes.float32) mean_squared_error = math_ops.reduce_mean(diff * diff) root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss") loss = root_mean_squared_error else: loss = math_ops.reduce_mean( nn_ops.sparse_softmax_cross_entropy_with_logits( labels=array_ops.squeeze(math_ops.cast(labels, dtypes.int32)), logits=self.training_inference_graph(data)), name="loss") if self.regularizer: loss += layers.apply_regularization(self.regularizer, variables.trainable_variables()) return loss
def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP): """Compute gradients of "loss" for the variables in "var_list". This is the first part of minimize(). It returns a list of (gradient, variable) pairs where "gradient" is the gradient for "variable". Note that "gradient" can be a Tensor, a IndexedSlices, or None if there is no gradient for the given variable. Args: loss: A Tensor containing the value to minimize. var_list: Optional list of variables.Variable to update to minimize "loss". Defaults to the list of variables collected in the graph under the key GraphKey.TRAINABLE_VARIABLES. gate_gradients: How to gate the computation of gradients. Can be GATE_NONE, GATE_OP, or GATE_GRAPH. Returns: A list of (gradient, variable) pairs. Raises: TypeError: If var_list contains anything else than variables.Variable. ValueError: If some arguments are invalid. """ if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH]: raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients) self._assert_valid_dtypes([loss]) if var_list is None: var_list = variables.trainable_variables() for var in var_list: if not isinstance(var, variables.Variable): raise TypeError("Argument is not a variables.Variable: %s" % var) grads = gradients.gradients( loss, var_list, gate_gradients=(gate_gradients == Optimizer.GATE_OP)) if gate_gradients == Optimizer.GATE_GRAPH: grads = control_flow_ops.tuple(grads) grads_and_vars = zip(grads, var_list) self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None]) return grads_and_vars
def testLinear(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(1.0)): x = array_ops.zeros([1, 2]) l = linear([x], 2, False) sess.run([variables_lib.global_variables_initializer()]) res = sess.run([l], {x.name: np.array([[1., 2.]])}) self.assertAllClose(res[0], [[3.0, 3.0]]) # Checks prevent you from accidentally creating a shared function. with self.assertRaises(ValueError): l1 = linear([x], 2, False) # But you can create a new one in a new scope and share the variables. with variable_scope.variable_scope("l1") as new_scope: l1 = linear([x], 2, False) with variable_scope.variable_scope(new_scope, reuse=True): linear([l1], 2, False) self.assertEqual(len(variables_lib.trainable_variables()), 2)
def test_run_inception_graph_pool_output(self, use_default_graph_def): """Test `run_inception` graph construction with pool output.""" batch_size = 3 img = array_ops.ones([batch_size, 299, 299, 3]) if use_default_graph_def: pool = _run_with_mock( classifier_metrics.run_inception, img, output_tensor=classifier_metrics.INCEPTION_FINAL_POOL) else: pool = classifier_metrics.run_inception( img, _get_dummy_graphdef(), output_tensor=classifier_metrics.INCEPTION_FINAL_POOL) self.assertTrue(isinstance(pool, ops.Tensor)) pool.shape.assert_is_compatible_with([batch_size, 2048]) # Check that none of the model variables are trainable. self.assertListEqual([], variables.trainable_variables())
def testFunctionalConv3DTransposeInitializerFromScope(self): with self.test_session() as sess: with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()): depth, height, width = 5, 7, 9 volumes = random_ops.random_uniform( (5, depth, height, width, 32), seed=1) conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') weights = variables.trainable_variables() # Check the names of weights in order. self.assertTrue('kernel' in weights[0].name) self.assertTrue('bias' in weights[1].name) sess.run(variables.global_variables_initializer()) weights = sess.run(weights) # Check that the kernel weights got initialized to ones (from scope) self.assertAllClose(weights[0], np.ones((3, 3, 3, 4, 32))) # Check that the bias still got initialized to zeros. self.assertAllClose(weights[1], np.zeros((4)))
def testDoubleCallInUniqueScope(self): @rev_block_lib.recompute_grad def layer_with_recompute(inputs): with variable_scope.variable_scope("inner", use_resource=True): return core_layers.dense(inputs, 2) with variable_scope.variable_scope("layer", use_resource=True): inputs = array_ops.ones((2, 4), dtypes.float32) with variable_scope.variable_scope("layer1", use_resource=True): out1 = layer_with_recompute(inputs) with variable_scope.variable_scope("layer2", use_resource=True): out2 = layer_with_recompute(inputs) + out1 out = math_ops.reduce_sum(out2) tvars = variables.trainable_variables() assert len(tvars) == 4 grads = gradients_impl.gradients(out, [inputs] + tvars) for grad in grads: self.assertIsNotNone(grad)
def variables_to_restore(self, moving_avg_variables=None): """Returns a map of names to `Variables` to restore. If a variable has a moving average, use the moving average variable name as the restore name; otherwise, use the variable name. For example, ```python variables_to_restore = ema.variables_to_restore() saver = tf.train.Saver(variables_to_restore) ``` Below is an example of such mapping: ``` conv/batchnorm/gamma/ExponentialMovingAverage: conv/batchnorm/gamma, conv_4/conv2d_params/ExponentialMovingAverage: conv_4/conv2d_params, global_step: global_step ``` Args: moving_avg_variables: a list of variables that require to use of the moving variable name to be restored. If None, it will default to variables.moving_average_variables() + variables.trainable_variables() Returns: A map from restore_names to variables. The restore_name can be the moving_average version of the variable name if it exist, or the original variable name. """ name_map = {} if moving_avg_variables is None: # Include trainable variables and variables which have been explicitly # added to the moving_average_variables collection. moving_avg_variables = variables.trainable_variables() moving_avg_variables += variables.moving_average_variables() # Remove duplicates moving_avg_variables = set(moving_avg_variables) # Collect all the variables with moving average, for v in moving_avg_variables: name_map[self.average_name(v)] = v # Make sure we restore variables without moving averages as well. moving_avg_variable_names = set([v.name for v in moving_avg_variables]) for v in list(set(variables.global_variables())): if v.name not in moving_avg_variable_names and v.op.name not in name_map: name_map[v.op.name] = v return name_map
def variables_to_restore(self): """Returns a map of names to `Variables` to restore. If a variable has a moving average, use the moving average variable name as the restore name; otherwise, use the variable name. For example, ```python variables_to_restore = ema.variables_to_restore() saver = tf.train.Saver(variables_to_restore) ``` Below is an example of such mapping: ``` conv/batchnorm/gamma/ExponentialMovingAverage: conv/batchnorm/gamma, conv_4/conv2d_params/ExponentialMovingAverage: conv_4/conv2d_params, global_step: global_step ``` Returns: A map from restore_names to variables. The restore_name can be the moving_average version of the variable name if it exist, or the original variable name. """ name_map = {} # Collect all the variables with moving average, including all # the trainable variables and variables which have been explicitly # added to the collection. moving_avg_variables = list( set(variables.moving_average_variables() + variables.trainable_variables())) for v in moving_avg_variables: name_map[self.average_name(v)] = v # Make sure we restore variables without moving average as well. for v in list( set(variables.all_variables()) - set(moving_avg_variables)): if v.op.name not in name_map: name_map[v.op.name] = v return name_map
def _model_fn(features, labels, mode, config, params): """A Estimator `model_fn` for TPUEstimator.""" model_fn_wrapper = _ModelFnWrapper(model_fn, config, params, mode, train_batch_size) # TODO(jhseu): Move to EVAL and PREDICT to TPU. if not use_tpu or mode != model_fn_lib.ModeKeys.TRAIN: return model_fn_wrapper.call_without_tpu(features, labels) inputs = _InputsHolder(features=features, labels=labels, num_shards=config.tpu_config.num_shards) dequeue_fn, enqueue_fn = _create_infeed_enqueue_ops_and_dequeue_fn( inputs, config) loss = _train_on_tpu_system(model_fn_wrapper, dequeue_fn) # Gets the variables back from TPU nodes. This means the variables updated # by TPU will now be *synced* to host memory. update_ops = [ array_ops.check_numerics(v.read_value(), 'Gradient for %s is NaN' % v.name).op for v in variables.trainable_variables() ] hooks = [ TPUInfeedSessionHook(config, enqueue_fn), training.LoggingTensorHook( { 'loss': array_ops.identity(loss), 'step': training.get_global_step() }, every_n_secs=30) ] return model_fn_lib.EstimatorSpec( mode, loss=array_ops.identity(loss), training_hooks=hooks, train_op=control_flow_ops.group(*update_ops))
def get_init_op(self, task_index): """Returns the op to let all the local variables and local center variables equal to the global center variables before the training begins""" def _Add_sync_queues_and_barrier(enqueue_after_list): """Adds ops to enqueu on all worker queues""" sync_queues = [ data_flow_ops.FIFOQueue(self._num_worker, [dtypes.bool], shapes=[[]], shared_name='%s%s' % ('variable_init_sync_queue', i)) for i in range(self._num_worker) ] queue_ops = [] # For each other worker, add an entry in a queue token = constant_op.constant(False) with ops.control_dependencies(enqueue_after_list): for i, q in enumerate(sync_queues): if i == task_index: queue_ops.append(control_flow_ops.no_op()) else: queue_ops.append(q.enqueue(token)) queue_ops.append( sync_queues[task_index].dequeue_many(len(sync_queues) - 1)) return control_flow_ops.group(*queue_ops) init_ops = [] local_vars = variables.trainable_variables() global_center_vars = [self._global_map[var] for var in local_vars] local_center_vars = [self._local_map[var] for var in local_vars] if not (local_vars and global_center_vars and local_center_vars): raise ValueError( 'The lists of local_variables, global_center_variables, ' 'local_center_variables should not be empty ') for lvar, gc_var, lc_var in zip(local_vars, global_center_vars, local_center_vars): init_ops.append(state_ops.assign(lvar, gc_var)) init_ops.append(state_ops.assign(lc_var, gc_var)) init_op = control_flow_ops.group(*(init_ops)) sync_queue_op = _Add_sync_queues_and_barrier([init_op]) return sync_queue_op
def create_mnist_per_eg_grad(batch_size, data_format, training): images = random_ops.random_uniform([batch_size, 28, 28]) sparse_labels = np.random.randint( low=0, high=10, size=[batch_size]).astype(np.int32) labels = np.zeros((batch_size, 10)).astype(np.float32) labels[np.arange(batch_size), sparse_labels] = 1. model = Mnist(data_format) def loop_fn(i): image = array_ops.gather(images, i) label = array_ops.gather(labels, i) logits = array_ops.reshape(model(image, training=training), [-1]) loss = losses.softmax_cross_entropy( logits=logits, onehot_labels=label, reduction=losses.Reduction.NONE) return gradient_ops.gradients(loss, variables.trainable_variables()) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) while_outputs = control_flow_ops.for_loop( loop_fn, [dtypes.float32] * len(variables.trainable_variables()), batch_size) return pfor_outputs, while_outputs
def testGetterThatCreatesTwoVariablesAndSumsThem(self): def custom_getter(getter, name, *args, **kwargs): g_0 = getter("%s/0" % name, *args, **kwargs) g_1 = getter("%s/1" % name, *args, **kwargs) with ops.name_scope("custom_getter"): return g_0 + g_1 with variable_scope.variable_scope("scope", custom_getter=custom_getter): v = variable_scope.get_variable("v", [1, 2, 3]) self.assertEqual([1, 2, 3], v.get_shape()) true_vars = variables_lib.trainable_variables() self.assertEqual(2, len(true_vars)) self.assertEqual("scope/v/0:0", true_vars[0].name) self.assertEqual("scope/v/1:0", true_vars[1].name) self.assertEqual("custom_getter/add:0", v.name) with self.test_session() as sess: variables_lib.global_variables_initializer().run() np_vars, np_v = sess.run([true_vars, v]) self.assertAllClose(np_v, sum(np_vars))
def create_fc_per_eg_grad(batch_size, activation_size, num_layers): inp = random_ops.random_normal([batch_size, activation_size]) layers = [ tf_layers.Dense(activation_size, activation=nn.relu) for _ in range(num_layers) ] projection = tf_layers.Dense(1) def model_fn(activation): for layer in layers: activation = layer(activation) activation = projection(activation) activation = nn.l2_loss(activation) return gradient_ops.gradients(activation, variables.trainable_variables()) def loop_fn(i): return model_fn(array_ops.expand_dims(array_ops.gather(inp, i), 0)) pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size) loop_fn_dtypes = [x.dtype for x in variables.trainable_variables()] while_outputs = control_flow_ops.for_loop(loop_fn, loop_fn_dtypes, batch_size) return pfor_outputs, while_outputs
def _minimize_towers(tower_specs, optimizer): """Aggregate and apply gradients for computed losses.""" grad_lists = {} for tower_spec in tower_specs: with ops_lib.device(tower_spec.loss.device): variables = variables_lib.trainable_variables() gradients = gradients_lib.gradients(tower_spec.loss, variables) for var, grad in zip(variables, gradients): if grad is not None: grad_lists.setdefault(var, []).append(grad) aggregated_grads = [] with ops_lib.name_scope('gradient_aggregating'): for var, grads in six.iteritems(grad_lists): grad = _compute_sum_on_device(grads, var.device) aggregated_grads.append((grad, var)) train_op = optimizer.apply_gradients( aggregated_grads, global_step=training_util.get_global_step()) return train_op
def _RunRnn(self, numpy_inputs, numpy_slen, cell_name, variable_cache, is_dynamic, time_major=None, is_bidirectional=False): with ops.Graph().as_default() as graph: tf_inputs = array_ops.placeholder(dtypes.float32, shape=numpy_inputs.shape) tf_slen = array_ops.placeholder(dtypes.int32) feeds = {tf_inputs: numpy_inputs, tf_slen: numpy_slen} cell = self._CreateCell(cell_name) if is_dynamic: if is_bidirectional: fn = rnn_lib.bidirectional_dynamic_rnn else: fn = rnn_lib.dynamic_rnn else: if is_bidirectional: fn = functional_rnn.bidirectional_functional_rnn else: fn = functional_rnn.functional_rnn fetches = self._CreateRnnGraph(fn, cell, tf_inputs, tf_slen, is_bidirectional, time_major=time_major) with self.session(graph=graph) as sess: sess.run(variables.global_variables_initializer()) # Note that cell.trainable_variables it not always set. self._MaybeResetVariables(variable_cache, sess, variables.trainable_variables()) val = sess.run(fetches, feed_dict=feeds) graph_def = graph.as_graph_def() return graph_def, val
def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False): """""" # Error checking if gate_gradients not in [ Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH ]: raise ValueError( "gate_gradients must be one of: Optimizer.GATE_NONE, " + "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients) self._assert_valid_dtypes([loss]) if var_list is None: var_list = variables.trainable_variables() for x_tm1 in var_list: if not isinstance(x_tm1, variables.Variable): raise TypeError("Argument is not a tf.Variable: %s" % x_tm1) if not var_list: raise ValueError("No variables to optimize") # The actual stuff var_refs = [x_tm1.ref() for x_tm1 in var_list] grads = gradients.gradients( loss, var_refs, gate_gradients=(gate_gradients == Optimizer.GATE_OP), aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops) if gate_gradients == Optimizer.GATE_GRAPH: grads = control_flow_ops.tuple(grads) grads_and_vars = list(zip(grads, var_list)) self._assert_valid_dtypes( [x_tm1 for g_t, x_tm1 in grads_and_vars if g_t is not None]) return grads_and_vars
def create_train_op(self, learning_rate=1.0, gradient_multiplier=1.0): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = LogisticClassifier(tf_inputs) loss_ops.log_loss(tf_predictions, tf_labels) total_loss = loss_ops.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=learning_rate) if gradient_multiplier != 1.0: variables = variables_lib.trainable_variables() gradient_multipliers = { var: gradient_multiplier for var in variables } else: gradient_multipliers = None return learning.create_train_op( total_loss, optimizer, gradient_multipliers=gradient_multipliers)
def __init__(self, learning_rate=0.001, decay=0.9, epsilon=1e-10, damping=0.001, cov_ema_decay=0.95, lrdecay=0.96, decay_interval=50, layer_collection=None, estimation_mode='gradients', colocate_gradient_with_ops=True, use_locking=False, name="kSGLDOpt"): super(kSGLDOpt, self).__init__(use_locking, name) self._lr = learning_rate self._decay = decay self._epsilon = epsilon self._lrdecay = lrdecay self._decay_interval = decay_interval self._variables = tf_variables.trainable_variables() self.damping_fn = lambda: damping self.cov_ema_decay = cov_ema_decay self.layer_collection = layer_collection self.estimation_mode = estimation_mode self.colocate_gradient_with_ops = colocate_gradient_with_ops # Tensor versions of the constructor arguments, created in _prepare(). self._lr_t = None self._decay_t = None self._epsilon_t = None self._fisher_est = est.FisherEstimator(self.damping_fn, self._variables, self.cov_ema_decay, self.layer_collection, self.estimation_mode, self.colocate_gradient_with_ops)
def testMultivariateNormalDiagNegLogLikelihood(self): num_draws = 50 dims = 3 with self.cached_session() as sess: x_pl = array_ops.placeholder(dtype=dtypes.float32, shape=[None, dims], name="x") mu_var = variable_scope.get_variable( name="mu", shape=[dims], dtype=dtypes.float32, initializer=init_ops.constant_initializer(1.)) sess.run([variables.global_variables_initializer()]) mvn = ds.MultivariateNormalDiag(loc=mu_var, scale_diag=array_ops.ones( shape=[dims], dtype=dtypes.float32)) # Typically you'd use `mvn.log_prob(x_pl)` which is always at least as # numerically stable as `tf.log(mvn.prob(x_pl))`. However in this test # we're testing a bug specific to `prob` and not `log_prob`; # http://stackoverflow.com/q/45109305. (The underlying issue was not # related to `Distributions` but that `reduce_prod` didn't correctly # handle negative indexes.) neg_log_likelihood = -math_ops.reduce_sum( math_ops.log(mvn.prob(x_pl))) grad_neg_log_likelihood = gradients_impl.gradients( neg_log_likelihood, variables.trainable_variables()) x = np.zeros([num_draws, dims], dtype=np.float32) grad_neg_log_likelihood_ = sess.run(grad_neg_log_likelihood, feed_dict={x_pl: x}) self.assertEqual(1, len(grad_neg_log_likelihood_)) self.assertAllClose(grad_neg_log_likelihood_[0], np.tile(num_draws, dims), rtol=1e-6, atol=0.)
def testStochasticVariables(self): shape = (10, 20) with variable_scope.variable_scope( "stochastic_variables", custom_getter=sv.make_stochastic_variable_getter( dist_cls=dist.NormalWithSoftplusScale)): v = variable_scope.get_variable("sv", shape) self.assertTrue(isinstance(v, st.StochasticTensor)) self.assertTrue( isinstance(v.distribution, dist.NormalWithSoftplusScale)) self.assertEqual( {"stochastic_variables/sv_loc", "stochastic_variables/sv_scale"}, set([v.op.name for v in variables.global_variables()])) self.assertEqual(set(variables.trainable_variables()), set(variables.global_variables())) v = ops.convert_to_tensor(v) self.assertEqual(list(shape), v.get_shape().as_list()) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) self.assertEqual(shape, sess.run(v).shape)
def _generate_shared_variables(self): """Generate a global variable placed on ps for each trainable variable. This creates a new copy of each user-defined trainable variable and places them on ps_device. These variables store the averaged parameters. """ # Only the chief should initialize the variables if self._is_chief: collections = [ops.GraphKeys.GLOBAL_VARIABLES, "global_model"] else: collections = ["global_model"] # Generate new global variables dependent on trainable variables. with ops.device(self._device_setter): for v in variables.trainable_variables(): _ = variable_scope.variable( name="%s/%s" % (self._name, v.op.name), initial_value=v.initialized_value(), trainable=False, collections=collections) # Place the global step in the ps so that all the workers can see it self._global_step = variables.Variable(0, name="%s_global_step" % self._name, trainable=False)
def _model_fn(features, labels, mode): """model_fn.""" # TODO(jhseu): Move to EVAL and PREDICT to TPU. if mode != model_fn_lib.ModeKeys.TRAIN: return model_fn(features, labels, mode) dequeue_fn, enqueue_fn = (_create_infeed_enqueue_ops_and_dequeue_fn( run_config, features, labels)) loss = _train_on_tpu_shards(run_config, train_step=_convert_model_fn_to_train_step( model_fn, dequeue_fn, mode, run_config)) # Gets the variables back from TPU nodes. This means the variables updated # by TPU will now be *synced* to host memory. update_ops = [ array_ops.check_numerics(v.read_value(), 'Gradient for %s is NaN' % v.name).op for v in variables.trainable_variables() ] hooks = [ TpuInfeedSessionHook(run_config, enqueue_fn), training.LoggingTensorHook( { 'loss': array_ops.identity(loss), 'step': training.get_global_step() }, every_n_secs=30) ] return model_fn_lib.EstimatorSpec( mode, loss=array_ops.identity(loss), training_hooks=hooks, train_op=control_flow_ops.group(*update_ops))
def compute_gradients_with_injected_short_circuiting(loss, var_list=None, gate_gradients=optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, should_stop_queue=None, global_step=None, grad_loss=None): assert should_stop_queue is not None assert global_step is not None if gate_gradients not in [optimizer.Optimizer.GATE_NONE, optimizer.Optimizer.GATE_OP, optimizer.Optimizer.GATE_GRAPH]: raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients) assert_valid_dtypes([loss]) if grad_loss is not None: assert_valid_dtypes([grad_loss]) if var_list is None: var_list = variables.trainable_variables() for var in var_list: if not isinstance(var, variables.Variable): raise TypeError("Argument is not a tf.Variable: %s" % var) if not var_list: raise ValueError("No variables to optimize") var_refs = [v._ref() for v in var_list] grads = gradients.gradients_short_circuited( loss, var_refs, grad_ys=grad_loss, gate_gradients=(gate_gradients == optimizer.Optimizer.GATE_OP), aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, should_stop_queue=should_stop_queue, global_step=global_step) if gate_gradients == optimizer.Optimizer.GATE_GRAPH: grads = control_flow_ops.tuple(grads) grads_and_vars = list(zip(grads, var_list)) assert_valid_dtypes([v for g, v in grads_and_vars if g is not None]) return grads_and_vars
def begin(self): self._fed_avg_optimizer._generate_shared_variables() local_vars = variables.trainable_variables() global_vars = ops.get_collection_ref("global_model") self._refresh_local_vars_op = self._fed_avg_optimizer._assign_vars( local_vars, global_vars) local_and_init_vars = list(zip(local_vars, global_vars)) self._apply_ma_op = self._fed_avg_optimizer._apply_model_average( local_and_init_vars, global_vars) if self._is_chief: self._local_init_op = self._fed_avg_optimizer.chief_init_op self._ready_for_local_init_op = ( self._fed_avg_optimizer.ready_for_local_init_op) self._q_runner = self._fed_avg_optimizer.get_chief_queue_runner() self._init_tokens_op = self._fed_avg_optimizer.get_init_tokens_op( self._num_tokens) else: self._local_init_op = self._fed_avg_optimizer.local_step_init_op self._ready_for_local_init_op = ( self._fed_avg_optimizer.ready_for_local_init_op) self._q_runner = None self._init_tokens_op = None
def _build_network(self, scope=None): with vs.variable_scope(scope, "ActorNetwork") as s: inputs = array_ops.placeholder( shape=[None, self._num_units], dtype=dtypes.float32, name="inputs") kernel = vs.get_variable( name="network_kernel", shape=[self._num_units, self._num_actions]) bias = vs.get_variable( name="network_bias", shape=[self._num_actions]) if self._batch_norm: normalized_inputs = contrib_layers.batch_norm( inputs=inputs, is_training=True, # force the updates in place # but have a speed penalty updates_collections=None) else: normalized_inputs = inputs # for easier fetching normalized_inputs = array_ops.identity( normalized_inputs, name="normalized_inputs") # one layer without linearity outputs = math_ops.matmul(normalized_inputs, kernel) outputs = nn_ops.bias_add(outputs, bias, name="outputs") if self._activation is not None: outputs = self._activation(outputs, name="outputs_activated") parameters = variables.trainable_variables(s.name) return inputs, outputs, parameters, normalized_inputs