def testAddWeight(self): layer = base_layers.Layer(name='my_layer') # Test basic variable creation. variable = layer.add_variable( 'my_var', [2, 2], initializer=init_ops.zeros_initializer()) self.assertEqual(variable.name, 'my_layer/my_var:0') self.assertListEqual(layer.variables, [variable]) self.assertListEqual(layer.trainable_variables, [variable]) self.assertListEqual(layer.non_trainable_variables, []) self.assertListEqual(layer.variables, ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) # Test non-trainable variable creation. # layer.add_variable should work even outside `build` and `call`. variable_2 = layer.add_variable( 'non_trainable_var', [2, 2], initializer=init_ops.zeros_initializer(), trainable=False) self.assertListEqual(layer.variables, [variable, variable_2]) self.assertListEqual(layer.trainable_variables, [variable]) self.assertListEqual(layer.non_trainable_variables, [variable_2]) self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1) if context.in_graph_mode(): # regularizers only supported in GRAPH mode. regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3 variable = layer.add_variable( 'reg_var', [2, 2], initializer=init_ops.zeros_initializer(), regularizer=regularizer) self.assertEqual(len(layer.losses), 1)
def weighted_moving_average(value, decay, weight, truediv=True, collections=None, name=None): """Compute the weighted moving average of `value`. Conceptually, the weighted moving average is: `moving_average(value * weight) / moving_average(weight)`, where a moving average updates by the rule `new_value = decay * old_value + (1 - decay) * update` Internally, this Op keeps moving average variables of both `value * weight` and `weight`. Args: value: A numeric `Tensor`. decay: A float `Tensor` or float value. The moving average decay. weight: `Tensor` that keeps the current value of a weight. Shape should be able to multiply `value`. truediv: Boolean, if `True`, dividing by `moving_average(weight)` is floating point division. If `False`, use division implied by dtypes. collections: List of graph collections keys to add the internal variables `value * weight` and `weight` to. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. name: Optional name of the returned operation. Defaults to "WeightedMovingAvg". Returns: An Operation that updates and returns the weighted moving average. """ # Unlike assign_moving_average, the weighted moving average doesn't modify # user-visible variables. It is the ratio of two internal variables, which are # moving averages of the updates. Thus, the signature of this function is # quite different than assign_moving_average. if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] with variable_scope.variable_scope(name, "WeightedMovingAvg", [value, weight, decay]) as scope: value_x_weight_var = variable_scope.get_variable( "value_x_weight", initializer=init_ops.zeros_initializer(value.get_shape(), dtype=value.dtype), trainable=False, collections=collections) weight_var = variable_scope.get_variable( "weight", initializer=init_ops.zeros_initializer(weight.get_shape(), dtype=weight.dtype), trainable=False, collections=collections) numerator = assign_moving_average( value_x_weight_var, value * weight, decay, zero_debias=False) denominator = assign_moving_average( weight_var, weight, decay, zero_debias=False) if truediv: return math_ops.truediv(numerator, denominator, name=scope.name) else: return math_ops.div(numerator, denominator, name=scope.name)
def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True, beta_initializer=init_ops.zeros_initializer(), gamma_initializer=init_ops.ones_initializer(), moving_mean_initializer=init_ops.zeros_initializer(), moving_variance_initializer=init_ops.ones_initializer(), beta_regularizer=None, gamma_regularizer=None, trainable=True, name=None, **kwargs): super(BatchNormalization, self).__init__( name=name, trainable=trainable, **kwargs) self.axis = axis self.momentum = momentum self.epsilon = epsilon self.center = center self.scale = scale self.beta_initializer = beta_initializer self.gamma_initializer = gamma_initializer self.moving_mean_initializer = moving_mean_initializer self.moving_variance_initializer = moving_variance_initializer self.beta_regularizer = beta_regularizer self.gamma_regularizer = gamma_regularizer
def testKernelStateList(self): """Test that transition kernel works with list input to `state`.""" num_chains = 2 loc_one = variable_scope.get_variable( "loc_one", [num_chains], initializer=init_ops.zeros_initializer()) loc_two = variable_scope.get_variable( "loc_two", [num_chains], initializer=init_ops.zeros_initializer()) def target_log_prob_fn(loc_one, loc_two): loc = array_ops.stack([loc_one, loc_two]) log_prob = mvn_tril_lib.MultivariateNormalTriL( loc=constant_op.constant([0., 0.]), scale_tril=constant_op.constant([[0.1, 0.1], [0.0, 0.1]])).log_prob( loc) return math_ops.reduce_sum(log_prob, 0) def proposal_fn(loc_one, loc_two): loc_one_proposal = mh.proposal_normal(scale=0.05) loc_two_proposal = mh.proposal_normal(scale=0.05) loc_one_sample, _ = loc_one_proposal(loc_one) loc_two_sample, _ = loc_two_proposal(loc_two) return [loc_one_sample, loc_two_sample], None new_state, _ = mh.kernel( target_log_prob_fn=target_log_prob_fn, proposal_fn=proposal_fn, current_state=[loc_one, loc_two], seed=12415) loc_one_update = loc_one.assign(new_state[0]) loc_two_update = loc_two.assign(new_state[1]) init = variables.initialize_all_variables() with self.test_session() as sess: sess.run(init) loc_one_samples = [] loc_two_samples = [] for _ in range(10000): loc_one_sample, loc_two_sample = sess.run( [loc_one_update, loc_two_update]) loc_one_samples.append(loc_one_sample) loc_two_samples.append(loc_two_sample) loc_one_samples = np.array(loc_one_samples) loc_two_samples = np.array(loc_two_samples) loc_one_samples = loc_one_samples[1000:] # drop samples for burn-in loc_two_samples = loc_two_samples[1000:] # drop samples for burn-in self.assertAllClose(np.mean(loc_one_samples, 0), np.array([0.] * num_chains), rtol=1e-5, atol=1e-1) self.assertAllClose(np.mean(loc_two_samples, 0), np.array([0.] * num_chains), rtol=1e-5, atol=1e-1) self.assertAllClose(np.std(loc_one_samples, 0), np.array([0.1] * num_chains), rtol=1e-5, atol=1e-1) self.assertAllClose(np.std(loc_two_samples, 0), np.array([0.1] * num_chains), rtol=1e-5, atol=1e-1)
def _auc_hist_accumulate(hist_true, hist_false, nbins, collections): """Accumulate histograms in new variables.""" with variable_scope.variable_scope( None, 'hist_accumulate', [hist_true, hist_false]): # Holds running total histogram of scores for records labeled True. hist_true_acc = variable_scope.get_variable( 'hist_true_acc', initializer=init_ops.zeros_initializer( [nbins], dtype=hist_true.dtype), collections=collections, trainable=False) # Holds running total histogram of scores for records labeled False. hist_false_acc = variable_scope.get_variable( 'hist_false_acc', initializer=init_ops.zeros_initializer( [nbins], dtype=hist_false.dtype), collections=collections, trainable=False) update_op = control_flow_ops.group( hist_true_acc.assign_add(hist_true), hist_false_acc.assign_add(hist_false), name='update_op') return hist_true_acc, hist_false_acc, update_op
def linear_module(x, output_size): w = variable_scope.get_variable( "w", shape=[x.get_shape()[1], output_size], initializer=init_ops.zeros_initializer()) b = variable_scope.get_variable( "b", shape=[output_size], initializer=init_ops.zeros_initializer()) return (math_ops.matmul(x, w) + b), w
def _templated(): v = variable_scope.get_variable( "v", shape=[1], initializer=init_ops.zeros_initializer(), use_resource=True) v2 = variable_scope.get_variable( "v2", shape=[1], initializer=init_ops.zeros_initializer(), use_resource=True) return v, v + 1., v2
def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True, beta_initializer=init_ops.zeros_initializer(), gamma_initializer=init_ops.ones_initializer(), moving_mean_initializer=init_ops.zeros_initializer(), moving_variance_initializer=init_ops.ones_initializer(), beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None, trainable=True, virtual_batch_size=None, adjustment=None, name=None, **kwargs): super(BatchNormalization, self).__init__( name=name, trainable=trainable, **kwargs) if isinstance(axis, list): self.axis = axis[:] else: self.axis = axis self.momentum = momentum self.epsilon = epsilon self.center = center self.scale = scale self.beta_initializer = beta_initializer self.gamma_initializer = gamma_initializer self.moving_mean_initializer = moving_mean_initializer self.moving_variance_initializer = moving_variance_initializer self.beta_regularizer = beta_regularizer self.gamma_regularizer = gamma_regularizer self.beta_constraint = beta_constraint self.gamma_constraint = gamma_constraint self.renorm = renorm self.virtual_batch_size = virtual_batch_size self.adjustment = adjustment if fused is None: fused = True self.fused = fused self._bessels_correction_test_only = True if renorm: renorm_clipping = renorm_clipping or {} keys = ['rmax', 'rmin', 'dmax'] if set(renorm_clipping) - set(keys): raise ValueError('renorm_clipping %s contains keys not in %s' % (renorm_clipping, keys)) self.renorm_clipping = renorm_clipping self.renorm_momentum = renorm_momentum
def testAddWeight(self): layer = base_layers.Layer(name='my_layer') # Test basic variable creation. variable = layer.add_variable( 'my_var', [2, 2], initializer=init_ops.zeros_initializer()) self.assertEqual(variable.name, 'my_layer/my_var:0') self.assertEqual(layer.variables, [variable]) self.assertEqual(layer.trainable_variables, [variable]) self.assertEqual(layer.non_trainable_variables, []) if not context.executing_eagerly(): self.assertEqual( layer.variables, ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) # Test non-trainable variable creation. # layer.add_variable should work even outside `build` and `call`. variable_2 = layer.add_variable( 'non_trainable_var', [2, 2], initializer=init_ops.zeros_initializer(), trainable=False) self.assertEqual(layer.variables, [variable, variable_2]) self.assertEqual(layer.trainable_variables, [variable]) self.assertEqual(layer.non_trainable_variables, [variable_2]) if not context.executing_eagerly(): self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1) regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3 _ = layer.add_variable( 'reg_var', [2, 2], initializer=init_ops.zeros_initializer(), regularizer=regularizer) self.assertEqual(len(layer.losses), 1) added_variable = [False] # Test that sync `ON_READ` variables are defaulted to be non-trainable. variable_3 = layer.add_variable( 'sync_on_read_var', [2, 2], initializer=init_ops.zeros_initializer(), synchronization=variable_scope.VariableSynchronization.ON_READ, aggregation=variable_scope.VariableAggregation.SUM) self.assertEqual(layer.non_trainable_variables, [variable_2, variable_3]) @def_function.function def function_adds_weight(): if not added_variable[0]: layer.add_variable( 'reg_var_from_function', [2, 2], initializer=init_ops.zeros_initializer(), regularizer=regularizer) added_variable[0] = True function_adds_weight() self.assertEqual(len(layer.losses), 2)
def testLSTMLayer(self): # Run with all-0 weights, no padding. o = self._RunLSTMLayer('zeros', init_ops.zeros_initializer(), 0., 0., 0.) self.assertAllClose(o, [[[0.]] * self._batch_size] * 3) o = self._RunLSTMLayer('zeros', init_ops.zeros_initializer(), 0., 1., 0.) self.assertAllClose(o, [[[.25]] * self._batch_size, [[.125]] * self._batch_size, [[.0625]] * self._batch_size]) o = self._RunLSTMLayer('zeros', init_ops.zeros_initializer(), 1., 0., 0.) self.assertAllClose(o, [[[0.]] * self._batch_size] * 3) o = self._RunLSTMLayer('zeros', init_ops.zeros_initializer(), 1., 1., 0.) self.assertAllClose(o, [[[.25]] * self._batch_size, [[.125]] * self._batch_size, [[.0625]] * self._batch_size]) # Run with all-1 weights, no padding. weight1 = 1. for m_init in [0., 1.]: for c_init in [0., 1.]: o = self._RunLSTMLayer('ones', init_ops.ones_initializer(), m_init, c_init, 0.) m0 = self._NextM(self._inputs, weight1, m_init, c_init) c0 = self._NextC(self._inputs, weight1, m_init, c_init) self.assertAllClose(o[0], m0) m1 = self._NextM(self._inputs, weight1, m0, c0) c1 = self._NextC(self._inputs, weight1, m0, c0) self.assertAllClose(o[1], m1) m2 = self._NextM(self._inputs, weight1, m1, c1) self.assertAllClose(o[2], m2) # Run with random weights. for weight in np.random.rand(3): weight_tf = constant_op.constant(weight, dtypes.float32) random_weight = lambda shape, w=weight_tf: array_ops.fill(shape, w) # No padding. for m_init in [0., 1.]: for c_init in [0., 1.]: o = self._RunLSTMLayer('random', random_weight, m_init, c_init, 0.) m0 = self._NextM(self._inputs, weight, m_init, c_init) c0 = self._NextC(self._inputs, weight, m_init, c_init) self.assertAllClose(o[0], m0) m1 = self._NextM(self._inputs, weight, m0, c0) c1 = self._NextC(self._inputs, weight, m0, c0) self.assertAllClose(o[1], m1) m2 = self._NextM(self._inputs, weight, m1, c1) self.assertAllClose(o[2], m2) # Set padding. o = self._RunLSTMLayer('random', random_weight, 0., 0., 1.) self.assertAllClose(o, [[[0.]] * self._batch_size] * 3) o = self._RunLSTMLayer('random', random_weight, 0., 1., 1.) self.assertAllClose(o, [[[0.]] * self._batch_size] * 3) o = self._RunLSTMLayer('random', random_weight, 1., 0., 1.) self.assertAllClose(o, [[[1.]] * self._batch_size] * 3) o = self._RunLSTMLayer('random', random_weight, 1., 1., 1.) self.assertAllClose(o, [[[1.]] * self._batch_size] * 3)
def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True, beta_initializer=init_ops.zeros_initializer(), gamma_initializer=init_ops.ones_initializer(), moving_mean_initializer=init_ops.zeros_initializer(), moving_variance_initializer=init_ops.ones_initializer(), beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None, trainable=True, name=None, **kwargs): super(BatchNormalization, self).__init__( name=name, trainable=trainable, **kwargs) self.axis = axis self.momentum = momentum self.epsilon = epsilon self.center = center self.scale = scale self.beta_initializer = beta_initializer self.gamma_initializer = gamma_initializer self.moving_mean_initializer = moving_mean_initializer self.moving_variance_initializer = moving_variance_initializer self.beta_regularizer = beta_regularizer self.gamma_regularizer = gamma_regularizer self.beta_constraint = beta_constraint self.gamma_constraint = gamma_constraint self.renorm = renorm # This environment variable is only used during the testing period of fused # batch norm and will be removed after that. if fused is None: fused = _FUSED_DEFAULT self.fused = fused self._bessels_correction_test_only = True if renorm: renorm_clipping = renorm_clipping or {} keys = ['rmax', 'rmin', 'dmax'] if set(renorm_clipping) - set(keys): raise ValueError('renorm_clipping %s contains keys not in %s' % (renorm_clipping, keys)) self.renorm_clipping = renorm_clipping self.renorm_momentum = renorm_momentum
def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True, beta_initializer=init_ops.zeros_initializer(), gamma_initializer=init_ops.ones_initializer(), moving_mean_initializer=init_ops.zeros_initializer(), moving_variance_initializer=init_ops.ones_initializer(), beta_regularizer=None, gamma_regularizer=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=False, trainable=True, name=None, **kwargs): super(BatchNormalization, self).__init__( name=name, trainable=trainable, **kwargs) self.axis = axis self.momentum = momentum self.epsilon = epsilon self.center = center self.scale = scale self.beta_initializer = beta_initializer self.gamma_initializer = gamma_initializer self.moving_mean_initializer = moving_mean_initializer self.moving_variance_initializer = moving_variance_initializer self.beta_regularizer = beta_regularizer self.gamma_regularizer = gamma_regularizer self.renorm = renorm self.fused = fused if self.fused and renorm: raise ValueError( 'Batch renorm is currently not supported with fused batch norm.') if self.fused and (beta_regularizer is not None or gamma_regularizer is not None): raise ValueError('Regularizers are not currently ' 'supported for fused batch norm.') if renorm: renorm_clipping = renorm_clipping or {} keys = ['rmax', 'rmin', 'dmax'] if set(renorm_clipping) - set(keys): raise ValueError('renorm_clipping %s contains keys not in %s' % (renorm_clipping, keys)) self.renorm_clipping = renorm_clipping self.renorm_momentum = renorm_momentum
def __init__(self, value, decay, truediv=True, collections=None, reduction_indices=None, name=None): self.value = value self.reduction_indices = reduction_indices or [0] eps = 1e-8 if truediv: div = math_ops.truediv else: div = math_ops.div if collections is None: collections = [ops.GraphKeys.VARIABLES] value_shape = value.get_shape().as_list() shape = [] for dim in range(len(value_shape)): if dim in self.reduction_indices: shape.append(1) else: shape.append(value_shape[dim]) with variable_scope.variable_op_scope( [value, decay], name, "MomentTracker") as scope: mean_x_weight_var = variable_scope.get_variable("mean_x_weight", trainable=False, collections=collections, initializer=init_ops.zeros_initializer(shape, dtype=value.dtype)) variance_x_weight_var = variable_scope.get_variable("variance_x_weight", trainable=False, collections=collections, initializer=init_ops.zeros_initializer(shape, dtype=value.dtype)) weight_var = variable_scope.get_variable("weight", trainable=False, collections=collections, initializer=init_ops.zeros_initializer([1], dtype=tf.float32)) self.tracked_mean = div(mean_x_weight_var, weight_var + eps) self.tracked_variance = div(variance_x_weight_var, weight_var + eps) self.batch_mean, self.batch_variance = tf.nn.moments(self.value, axes=self.reduction_indices, shift=self.tracked_mean, keep_dims=True) mean_numerator = assign_moving_average(mean_x_weight_var, self.batch_mean, decay) variance_numerator = assign_moving_average(variance_x_weight_var, self.batch_variance, decay) denominator = assign_moving_average(weight_var, 1.0, decay) self.update_mean = div(mean_numerator, denominator + eps, name=scope.name) self.update_variance = div(variance_numerator, denominator + eps, name=scope.name)
def create_variables_and_ops(self, table, variable_name, num_hosts, table_config, table_variables, load_parameters_ops, retrieve_parameters_ops): optimizer_name = 'Adam' m_initializer = init_ops.zeros_initializer() m_variables = _create_partitioned_variables( name='%s/%s/m' % (variable_name, optimizer_name), num_hosts=num_hosts, vocabulary_size=table_config.vocabulary_size, embedding_dimension=table_config.dimension, collections=[ops.GraphKeys.GLOBAL_VARIABLES], initializer=m_initializer) v_initializer = init_ops.zeros_initializer() v_variables = _create_partitioned_variables( name='%s/%s/v' % (variable_name, optimizer_name), num_hosts=num_hosts, vocabulary_size=table_config.vocabulary_size, embedding_dimension=table_config.dimension, collections=[ops.GraphKeys.GLOBAL_VARIABLES], initializer=v_initializer) self._table_to_m_variables_dict[table] = m_variables self._table_to_v_variables_dict[table] = v_variables for host_id, table_variable, m_variable, v_variable in (zip( range(num_hosts), table_variables, m_variables, v_variables)): with ops.colocate_with(table_variable): load_parameters_op = ( tpu_ops.load_tpu_embedding_adam_parameters( parameters=table_variable, momenta=m_variable, velocities=v_variable, table_name=table, num_shards=num_hosts, shard_id=host_id)) retrieved_table, retrieved_m, retrieved_v = ( tpu_ops.retrieve_tpu_embedding_adam_parameters( table_name=table, num_shards=num_hosts, shard_id=host_id)) retrieve_parameters_op = control_flow_ops.group( state_ops.assign(table_variable, retrieved_table), state_ops.assign(m_variable, retrieved_m), state_ops.assign(v_variable, retrieved_v)) load_parameters_ops.append(load_parameters_op) retrieve_parameters_ops.append(retrieve_parameters_op)
def _get_or_create_eval_step(): """Gets or creates the eval step `Tensor`. Returns: A `Tensor` representing a counter for the evaluation step. Raises: ValueError: If multiple `Tensors` have been added to the `tf.GraphKeys.EVAL_STEP` collection. """ graph = ops.get_default_graph() eval_steps = graph.get_collection(ops.GraphKeys.EVAL_STEP) if len(eval_steps) == 1: return eval_steps[0] elif len(eval_steps) > 1: raise ValueError('Multiple tensors added to tf.GraphKeys.EVAL_STEP') else: counter = variable_scope.get_variable( 'eval_step', shape=[], dtype=dtypes.int64, initializer=init_ops.zeros_initializer(), trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES, ops.GraphKeys.EVAL_STEP]) return counter
def test_multiple_random_accumulating_updates_results_in_right_dist(self): # Accumulate the updates in a new variable. Resultant # histogram should be uniform. Use only 3 bins because with many bins it # would be unlikely that all would be close to 1/n. If someone ever wants # to test that, it would be better to check that the cdf was linear. value_range = [1.0, 4.14159] with self.test_session() as sess: values = array_ops.placeholder(dtypes.float32, shape=[4, 4, 4]) hist = histogram_ops.histogram_fixed_width( values, value_range, nbins=3, dtype=dtypes.int64) hist_accum = variables.Variable(init_ops.zeros_initializer()( [3], dtype=dtypes.int64)) hist_accum = hist_accum.assign_add(hist) variables.global_variables_initializer().run() for _ in range(100): # Map the rv: U[0, 1] --> U[value_range[0], value_range[1]]. values_arr = ( value_range[0] + (value_range[1] - value_range[0]) * self.rng.rand(4, 4, 4)) hist_accum_arr = sess.run(hist_accum, feed_dict={values: values_arr}) pmf = hist_accum_arr / float(hist_accum_arr.sum()) np.testing.assert_allclose(1 / 3, pmf, atol=0.02)
def testInitFromNonInitializer(self): with self.test_session() as sess: # Test various dtypes with zeros initializer as following: types = [ dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.uint16, dtypes.int32, dtypes.int64, dtypes.bool ] # Use different varibale_name to distinguish various dtypes for (i, dtype) in enumerate(types): x = variable_scope.get_variable( name="x%d" % i, shape=(3, 4), dtype=dtype, partitioner=axis0_into2_partitioner) y = variable_scope.get_variable( name="y%d" % i, shape=(6, 4), dtype=dtype, partitioner=axis0_into2_partitioner, initializer=init_ops.zeros_initializer(dtype=dtype)) variables_lib.global_variables_initializer().run() # x and y would become var list after partition val_x = sess.run(list(x)) val_y = sess.run(list(y)) self.assertAllEqual(val_x, val_y)
def _create_global_step(self, graph): """Creates a global step suitable for TPUs. Args: graph: The graph in which to create the global step. Returns: A global step `Tensor`. Raises: ValueError: if the global step tensor is already defined. """ graph = graph or ops.get_default_graph() if training.get_global_step(graph) is not None: raise ValueError('"global_step" already exists.') # Create in proper graph and base name_scope. with graph.as_default() as g, g.name_scope(None): return variable_scope.get_variable( ops.GraphKeys.GLOBAL_STEP, shape=[], dtype=dtypes.int32, initializer=init_ops.zeros_initializer(), trainable=False, use_resource=True, collections=[ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.GLOBAL_STEP])
def model_fn(features, labels, mode): _ = labels step = training.get_global_step() w = variable_scope.get_variable( 'w', shape=[], initializer=init_ops.zeros_initializer(), dtype=dtypes.int64) if estimator_lib.ModeKeys.TRAIN == mode: # to consume features, we have control dependency with ops.control_dependencies([features]): step_inc = state_ops.assign_add(training.get_global_step(), 1) with ops.control_dependencies([step_inc]): assign_w_to_step_plus_2 = w.assign(step + 2) return estimator_lib.EstimatorSpec( mode, loss=constant_op.constant(3.), train_op=assign_w_to_step_plus_2) if estimator_lib.ModeKeys.EVAL == mode: # to consume features, we have control dependency with ops.control_dependencies([features]): loss = constant_op.constant(5.) return estimator_lib.EstimatorSpec( mode, loss=loss, # w is constant in each step, so the mean. # w = 0 if step==0 else step+2 eval_metric_ops={'mean_of_const': metrics_lib.mean(w)})
def apply_gradients(self, grads_and_vars, global_step=None, name=None): gradients = [] # Number of stale gradients. stale_counter = variable_scope.get_variable( "stale_counter", [], initializer=init_ops.zeros_initializer(), trainable=False) def _AcceptGradientOp(): with ops.control_dependencies( [self._opt.apply_gradients( grads_and_vars, global_step=global_step, name=name)]): return gen_array_ops.identity(0.0) def _DropGradientOp(): return gen_array_ops.identity(1.0) for grad_and_var in grads_and_vars: grad = grad_and_var[0] if isinstance(grad, ops.Tensor): gradients.append(grad) else: gradients.append(grad.op) with ops.control_dependencies(gradients), ops.colocate_with(global_step): staleness = gen_array_ops.reshape( global_step - self._local_step, shape=()) conditional_update = stale_counter.assign_add(control_flow_ops.cond( gen_math_ops.less_equal(staleness, self._staleness), _AcceptGradientOp, _DropGradientOp)) summary.scalar( "Gradient staleness percentage", stale_counter / (math_ops.cast(global_step + 1, dtypes.float32))) return conditional_update
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=None, bias_initializer=init_ops.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, trainable=True, name=None, **kwargs): super(MaskedConv2D, self).__init__( rank=2, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, trainable=trainable, name=name, **kwargs)
def testInitialValueComesFromCheckpoint(self): checkpoint_dir = self.get_temp_dir() with self.test_session() as session: v1, _, _, _ = _create_checkpoints(session, checkpoint_dir) # New graph and session. with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: with variable_scope.variable_scope( "some_scope", initializer=init_ops.zeros_initializer()): my1 = variable_scope.get_variable("my1", [1, 10]) # At this point, my1.initialized_value() will add ops that reference # the zeros initializer of my1. before = variables.Variable(my1.initialized_value(), name="before") checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"var1": my1}) # At this point, my1.initialized_value() will add ops that reference # the newly set initializer of my1. after = variables.Variable(my1.initialized_value(), name="after") session.run(variables.global_variables_initializer()) self.assertAllEqual(session.run(my1), v1) self.assertAllEqual(session.run(my1.initialized_value()), v1) self.assertAllClose(session.run(before), [[0.0] * 10]) self.assertAllClose(session.run(after), v1) with self.assertRaises(AssertionError): self.assertAllClose(session.run(before), session.run(after))
def variable_scoped_function_no_return_value(trainable=True): # defun cannot compile functions that return non-Tensor objects _ = variable_scope.get_variable( "dummy", shape=[1], trainable=trainable, initializer=init_ops.zeros_initializer())
def testZerosInitializer(self): with self.test_session(use_gpu=True): shape = [2, 3] x = variable_scope.get_variable( "x", shape=shape, initializer=init_ops.zeros_initializer()) x.initializer.run() self.assertAllEqual(x.eval(), np.zeros(shape))
def testVariableInput(self): with self.test_session(): v = variable_scope.get_variable( 'X', initializer=init_ops.zeros_initializer(), shape=(1, 1)) x = core_layers.Dense(1)(v) variables.global_variables_initializer().run() self.assertAllEqual(x.eval(), [[0.0]])
def testInitialValueComesFromCheckpoint(self): checkpoint_dir = self.get_temp_dir() with self.test_session() as session: v1, _, _, _ = _create_checkpoints(session, checkpoint_dir) # New graph and session. with ops.Graph().as_default() as g: with self.test_session(graph=g) as session: with variable_scope.variable_scope( "some_scope", initializer=init_ops.zeros_initializer()): my1 = variable_scope.get_variable("my1", [1, 10]) before = my1.initialized_value() checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"var1": my1}) after = my1.initialized_value() self.assertAllEqual(session.run(before), [[0.0] * 10]) self.assertAllEqual(session.run(after), v1) session.run(variables.global_variables_initializer()) self.assertAllEqual(session.run(my1), v1) self.assertAllEqual(session.run(my1.initialized_value()), v1) self.assertAllClose(session.run(before), v1) self.assertAllClose(session.run(after), v1) with self.assertRaises(AssertionError): self.assertAllClose(v1, [[0.0] * 10])
def build(self, inputs_shape): if inputs_shape[1].value is None: raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" % inputs_shape) input_depth = inputs_shape[1].value self._gate_kernel = self.add_variable( "gates/%s" % _WEIGHTS_VARIABLE_NAME, shape=[input_depth + self._num_units, 2 * self._num_units], initializer=self._kernel_initializer) self._gate_bias = self.add_variable( "gates/%s" % _BIAS_VARIABLE_NAME, shape=[2 * self._num_units], initializer=( self._bias_initializer if self._bias_initializer is not None else init_ops.constant_initializer(1.0, dtype=self.dtype))) self._candidate_kernel = self.add_variable( "candidate/%s" % _WEIGHTS_VARIABLE_NAME, shape=[input_depth + self._num_units, self._num_units], initializer=self._kernel_initializer) self._candidate_bias = self.add_variable( "candidate/%s" % _BIAS_VARIABLE_NAME, shape=[self._num_units], initializer=( self._bias_initializer if self._bias_initializer is not None else init_ops.zeros_initializer(dtype=self.dtype))) self.built = True
def __init__(self, units, activation=None, use_bias=True, kernel_initializer=None, bias_initializer=init_ops.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, trainable=True, name=None, **kwargs): super(Dense, self).__init__(units=units, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, trainable=trainable, name=name, **kwargs)
def __init__(self, units, activation=None, use_bias=True, kernel_initializer=None, bias_initializer=init_ops.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, trainable=True, name=None, **kwargs): super(Dense, self).__init__(trainable=trainable, name=name, **kwargs) self.units = units self.activation = activation self.use_bias = use_bias self.kernel_initializer = kernel_initializer self.bias_initializer = bias_initializer self.kernel_regularizer = kernel_regularizer self.bias_regularizer = bias_regularizer self.activity_regularizer = activity_regularizer self.kernel_constraint = kernel_constraint self.bias_constraint = bias_constraint self.input_spec = base.InputSpec(min_ndim=2)
def create_global_step(graph=None): """Create global step tensor in graph. Args: graph: The graph in which to create the global step. If missing, use default graph. Returns: Global step tensor. Raises: ValueError: if global step key is already defined. """ graph = ops.get_default_graph() if graph is None else graph if get_global_step(graph) is not None: raise ValueError('"global_step" already exists.') # Create in proper graph and base name_scope. with graph.as_default() as g, g.name_scope(None): collections = [ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.GLOBAL_STEP] return variable( ops.GraphKeys.GLOBAL_STEP, shape=[], dtype=dtypes.int64, initializer=init_ops.zeros_initializer(), trainable=False, collections=collections)
def joint_weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A restricted linear prediction builder based on FeatureColumns. As long as all feature columns are unweighted sparse columns this computes the prediction of a linear model which stores all weights in a single variable. Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple containing: * A Tensor which represents predictions of a linear model. * A list of Variables storing the weights. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='joint_weighted_sum_from_feature_columns', values=columns_to_tensors.values()): transformer = _Transformer(columns_to_tensors) embedding_lookup_arguments = [] for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) try: embedding_lookup_arguments.append( column._wide_embedding_lookup_arguments( transformed_tensor)) # pylint: disable=protected-access except NotImplementedError: raise NotImplementedError( 'Real-valued columns are not supported. ' 'Use weighted_sum_from_feature_columns ' 'instead, or bucketize these columns.') variable, predictions_no_bias = _create_joint_embedding_lookup( columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer(), trainable=trainable, collections=_add_variable_collection(weight_collections)) _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, variable, bias
def alexnet_v2(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='alexnet_v2'): """AlexNet version 2. Described in: http://arxiv.org/pdf/1404.5997v2.pdf Parameters from: github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ layers-imagenet-1gpu.cfg Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. To use in fully convolutional mode, set spatial_squeeze to false. The LRN layers have been removed and change the initializers from random_normal_initializer to xavier_initializer. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=[end_points_collection]): net = layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1') net = layers.conv2d(net, 192, [5, 5], scope='conv2') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2') net = layers.conv2d(net, 384, [3, 3], scope='conv3') net = layers.conv2d(net, 384, [3, 3], scope='conv4') net = layers.conv2d(net, 256, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5') # Use conv2d instead of fully_connected layers. with arg_scope( [layers.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=init_ops.constant_initializer(0.1)): net = layers.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=init_ops.zeros_initializer(), scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def build(self, input_shape): self.my_var = self.add_variable( 'my_var', [2, 2], initializer=init_ops.zeros_initializer())
def moving_mean_variance(value, decay, collections=None, name=None): """Compute exponentially weighted moving {mean,variance} of a streaming value. The exponentially-weighting moving `mean_var` and `variance_var` are updated by `value` according to the following recurrence: ```python variance_var = decay * (variance_var + (1-decay) * (value - mean_var)**2) mean_var = decay * mean_var + (1 - decay) * value ``` Note: `mean_var` is updated *after* `variance_var`, i.e., `variance_var` uses the lag-`1` mean. For derivation justification, see equation 143 of: T. Finch, Feb 2009. "Incremental calculation of weighted mean and variance". http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf Unlike `assign_moving_mean_variance`, this function handles variable creation. Args: value: `float`-like `Tensor`. Same shape as `mean_var` and `variance_var`. decay: A `float`-like `Tensor`. The moving mean decay. Typically close to `1.`, e.g., `0.999`. collections: Python list of graph-collections keys to which the internal variables `mean_var` and `variance_var` are added. Default value is `[GraphKeys.GLOBAL_VARIABLES]`. name: Optional name of the returned operation. Returns: mean_var: `Variable` representing the `value`-updated exponentially weighted moving mean. variance_var: `Variable` representing the `value`-updated exponentially weighted moving variance. Raises: TypeError: if `value_var` does not have float type `dtype`. TypeError: if `value`, `decay` have different `base_dtype`. """ if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] with variable_scope.variable_scope(name, "moving_mean_variance", [value, decay]): value = ops.convert_to_tensor(value, name="value") base_dtype = value.dtype.base_dtype if not base_dtype.is_floating: raise TypeError( "value.base_dtype({}) does not have float type `dtype`.". format(base_dtype.name)) decay = ops.convert_to_tensor(decay, dtype=base_dtype, name="decay") variance_var = variable_scope.get_variable( "moving_variance", shape=value.shape, dtype=value.dtype, initializer=init_ops.zeros_initializer(), trainable=False, collections=collections) mean_var = variable_scope.get_variable( "moving_mean", shape=value.shape, dtype=value.dtype, initializer=init_ops.zeros_initializer(), trainable=False, collections=collections) return assign_moving_mean_variance(mean_var, variance_var, value, decay)
def conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', data_format=None, rate=1, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None, quantizer=None, weight_quantizer = None): """ function call from slim library. """ if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']: raise ValueError('Invalid data_format: %r' % (data_format,)) layer_variable_getter = layers._build_variable_getter( {'bias': 'biases', 'kernel': 'weights'}) with variable_scope.variable_scope( scope, 'Conv', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = ops.convert_to_tensor(inputs) input_rank = inputs.get_shape().ndims if input_rank == 4: layer_class = QConv2D #convolutional.Conv2D else: raise ValueError('Convolution not supported for input with rank', input_rank) df = ('channels_first' if data_format and data_format.startswith('NC') else 'channels_last') layer = layer_class(filters=num_outputs, kernel_size=kernel_size, strides=stride, padding=padding, data_format=df, dilation_rate=rate, activation=None, use_bias=not normalizer_fn and biases_initializer, kernel_initializer=weights_initializer, bias_initializer=biases_initializer, kernel_regularizer=weights_regularizer, bias_regularizer=biases_regularizer, activity_regularizer=None, trainable=trainable, name=sc.name, dtype=inputs.dtype.base_dtype, _scope=sc, _reuse=reuse, quantizer = quantizer, weight_quantizer=weight_quantizer) outputs = layer.apply(inputs) # Add variables to collections. layers._add_variable_to_collections(layer.kernel, variables_collections, 'weights') if layer.use_bias: layers._add_variable_to_collections(layer.bias, variables_collections, 'biases') if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) if quantizer is not None: # quantize after normalization outputs = quantizer.quantize(outputs) if activation_fn is not None: outputs = activation_fn(outputs) if quantizer is not None: # quantize after activation outputs = quantizer.quantize(outputs) return slim_utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A tf.contrib.layer style linear prediction builder based on FeatureColumns. Generally a single example in training data is described with feature columns. This function generates weighted sum for each num_outputs. Weighted sum refers to logits in classification problems. It refers to prediction itself for linear regression problems. Example: ``` # Building model for training feature_columns = ( real_valued_column("my_feature1"), ... ) columns_to_tensor = tf.parse_example(...) logits = weighted_sum_from_feature_columns( columns_to_tensors=columns_to_tensor, feature_columns=feature_columns, num_outputs=1) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) ``` Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple containing: * A Tensor which represents predictions of a linear model. * A dictionary which maps feature_column to corresponding Variable. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='weighted_sum_from_feature_columns', values=columns_to_tensors.values()): output_tensors = [] column_to_variable = dict() transformer = _Transformer(columns_to_tensors) # pylint: disable=protected-access for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) try: embedding_lookup_arguments = column._wide_embedding_lookup_arguments( transformed_tensor) variable, predictions = _create_embedding_lookup( column, columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections) except NotImplementedError: with variable_scope.variable_scope( None, default_name=column.name, values=columns_to_tensors.values()): tensor = column._to_dense_tensor(transformed_tensor) tensor = fc._reshape_real_valued_tensor( tensor, 2, column.name) variable = [ contrib_variables.model_variable( name='weight', shape=[tensor.get_shape()[1], num_outputs], initializer=init_ops.zeros_initializer(), trainable=trainable, collections=weight_collections) ] predictions = math_ops.matmul(tensor, variable[0], name='matmul') except ValueError as ee: raise ValueError( 'Error creating weighted sum for column: {}.\n' '{}'.format(column.name, ee)) output_tensors.append( array_ops.reshape(predictions, shape=(-1, num_outputs))) column_to_variable[column] = variable _log_variable(variable) _maybe_restore_from_checkpoint(column._checkpoint_path(), variable) # pylint: enable=protected-access predictions_no_bias = math_ops.add_n(output_tensors) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer(), trainable=trainable, collections=_add_variable_collection(weight_collections)) _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, column_to_variable, bias