def linear_logit_fn(features): """Linear model logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. """ cols_to_vars = {} logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=units, sparse_combiner=sparse_combiner, cols_to_vars=cols_to_vars) bias = cols_to_vars.pop('bias') if units > 1: summary.histogram('bias', bias) else: # If units == 1, the bias value is a length-1 list of a scalar Tensor, # so we should provide a scalar summary. summary.scalar('bias', bias[0][0]) summary.scalar('fraction_of_zero_weights', _compute_fraction_of_zero(cols_to_vars)) return logits
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: tf_summary.histogram(weight.name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = w_img.get_shape() if len(shape) > 1 and shape[0] > shape[1]: w_img = array_ops.transpose(w_img) if len(shape) == 1: w_img = array_ops.expand_dims(w_img, 0) w_img = array_ops.expand_dims(array_ops.expand_dims(w_img, 0), -1) tf_summary.image(weight.name, w_img) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir)
def add_gradients_summaries(grads_and_vars): """Add summaries to gradients. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The list of created summaries. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, ops.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append( summary.histogram(var.op.name + '/gradient', grad_values)) summaries.append( summary.histogram(var.op.name + '/gradient_norm', clip_ops.global_norm([grad_values]))) else: logging.info('Var %s has no gradient', var.op.name) return summaries
def add_gan_model_summaries(gan_model): """Adds typical GANModel summaries. Args: gan_model: A GANModel tuple. """ with ops.name_scope('generator_variables'): for var in gan_model.generator_variables: summary.histogram(var.name, var) with ops.name_scope('discriminator_variables'): for var in gan_model.discriminator_variables: summary.histogram(var.name, var)
def linear_logit_fn(features): """Linear model logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. """ if feature_column_v2.is_feature_column_v2(feature_columns): shared_state_manager = feature_column_v2.SharedEmbeddingStateManager() linear_model = feature_column_v2.LinearModel( feature_columns=feature_columns, units=units, sparse_combiner=sparse_combiner, shared_state_manager=shared_state_manager) logits = linear_model(features) bias = linear_model.bias_variable # We'd like to get all the non-bias variables associated with this # LinearModel. This includes the shared embedding variables as well. variables = linear_model.variables variables.remove(bias) variables.extend(shared_state_manager.variables) # Expand (potential) Partitioned variables bias = _get_expanded_variable_list([bias]) variables = _get_expanded_variable_list(variables) else: linear_model = feature_column._LinearModel( # pylint: disable=protected-access feature_columns=feature_columns, units=units, sparse_combiner=sparse_combiner, name='linear_model') logits = linear_model(features) cols_to_vars = linear_model.cols_to_vars() bias = cols_to_vars.pop('bias') variables = cols_to_vars.values() if units > 1: summary.histogram('bias', bias) else: # If units == 1, the bias value is a length-1 list of a scalar Tensor, # so we should provide a scalar summary. summary.scalar('bias', bias[0][0]) summary.scalar('fraction_of_zero_weights', _compute_fraction_of_zero(variables)) return logits
def testMergeSummary(self): with self.cached_session() as sess: const = constant_op.constant(10.0) summ1 = summary.histogram("h", const) summ2 = logging_ops.scalar_summary("c", const) merge = summary.merge([summ1, summ2]) value = sess.run(merge) self.assertEqual([], merge.get_shape()) self.assertProtoEquals(""" value { tag: "h" histo { min: 10.0 max: 10.0 num: 1.0 sum: 10.0 sum_squares: 100.0 bucket_limit: 9.93809490288 bucket_limit: 10.9319043932 bucket_limit: 1.7976931348623157e+308 bucket: 0.0 bucket: 1.0 bucket: 0.0 } } value { tag: "c" simple_value: 10.0 } """, self._AsSummary(value))
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits. """ with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = seq_fc.concatenate_context_input( context_input, sequence_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn( cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) last_activations = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs,)): logits = core_layers.dense( last_activations, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) return logits
def _make_histogram_ops(self, model): """Defines histogram ops when histogram_freq > 0.""" # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients(model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [ grad.values if is_indexed_slices(grad) else grad for grad in grads ] tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): if isinstance(layer.output, list): for i, output in enumerate(layer.output): tf_summary.histogram('{}_out_{}'.format(layer.name, i), output) else: tf_summary.histogram('{}_out'.format(layer.name), layer.output)
def add_gan_model_summaries(gan_model): """Adds typical GANModel summaries. Args: gan_model: A GANModel tuple. """ if isinstance(gan_model, namedtuples.CycleGANModel): with ops.name_scope('cyclegan_x2y_summaries'): add_gan_model_summaries(gan_model.model_x2y) with ops.name_scope('cyclegan_y2x_summaries'): add_gan_model_summaries(gan_model.model_y2x) return with ops.name_scope('generator_variables'): for var in gan_model.generator_variables: summary.histogram(var.name, var) with ops.name_scope('discriminator_variables'): for var in gan_model.discriminator_variables: summary.histogram(var.name, var)
def testHistogramSummary(self): with self.cached_session() as s: i = array_ops.ones((5, 4, 4, 3)) with ops.name_scope('outer'): summ_op = summary_lib.histogram('inner', i) summary_str = s.run(summ_op) summary = summary_pb2.Summary() summary.ParseFromString(summary_str) self.assertEqual(len(summary.value), 1) self.assertEqual(summary.value[0].tag, 'outer/inner')
def linear_regression(x, y, init_mean=None, init_stddev=1.0): """Creates linear regression TensorFlow subgraph. Args: x: tensor or placeholder for input features. y: tensor or placeholder for labels. init_mean: the mean value to use for initialization. init_stddev: the standard devation to use for initialization. Returns: Predictions and loss tensors. Side effects: The variables linear_regression.weights and linear_regression.bias are initialized as follows. If init_mean is not None, then initialization will be done using a random normal initializer with the given init_mean and init_stddv. (These may be set to 0.0 each if a zero initialization is desirable for convex use cases.) If init_mean is None, then the uniform_unit_scaling_initialzer will be used. """ with vs.variable_scope('linear_regression'): scope_name = vs.get_variable_scope().name summary.histogram('%s.x' % scope_name, x) summary.histogram('%s.y' % scope_name, y) dtype = x.dtype.base_dtype y_shape = y.get_shape() if len(y_shape) == 1: output_shape = 1 else: output_shape = y_shape[1] # Set up the requested initialization. if init_mean is None: weights = vs.get_variable( 'weights', [x.get_shape()[1], output_shape], dtype=dtype) bias = vs.get_variable('bias', [output_shape], dtype=dtype) else: weights = vs.get_variable( 'weights', [x.get_shape()[1], output_shape], initializer=init_ops.random_normal_initializer( init_mean, init_stddev, dtype=dtype), dtype=dtype) bias = vs.get_variable( 'bias', [output_shape], initializer=init_ops.random_normal_initializer( init_mean, init_stddev, dtype=dtype), dtype=dtype) summary.histogram('%s.weights' % scope_name, weights) summary.histogram('%s.bias' % scope_name, bias) return losses_ops.mean_squared_error_regressor(x, y, weights, bias)
def add_histogram_summary(tensor, name=None, prefix=None): """Adds a histogram summary for the given tensor. Args: tensor: A variable or op tensor. name: The optional name for the summary. prefix: An optional prefix for the summary names. Returns: A scalar `Tensor` of type `string` whose contents are the serialized `Summary` protocol buffer. """ return summary.histogram( _get_summary_name(tensor, name, prefix), tensor)
def generate_run(self, run_name): if run_name == self._RUN_WITH_HISTOGRAM: (use_histogram, use_scalars) = (True, False) elif run_name == self._RUN_WITH_SCALARS: (use_histogram, use_scalars) = (False, True) else: assert False, 'Invalid run name: %r' % run_name ops.reset_default_graph() sess = session.Session() placeholder = array_ops.placeholder(dtypes.float32, shape=[3]) if use_histogram: summary.histogram(self._HISTOGRAM_TAG, placeholder) if use_scalars: summary.scalar(self._SCALAR_TAG, math_ops.reduce_mean(placeholder)) summ = summary.merge_all() subdir = os.path.join(self.logdir, run_name) writer = summary.FileWriter(subdir) writer.add_graph(sess.graph) for step in xrange(self._STEPS): feed_dict = {placeholder: [1 + step, 2 + step, 3 + step]} s = sess.run(summ, feed_dict=feed_dict) writer.add_summary(s, global_step=step) writer.close()
def _add_histogram_summary(tensor, tag=None): """Add a summary operation for the histogram of a tensor. Args: tensor: The tensor to summarize. tag: The tag to use, if None then use tensor's op's name. Returns: The created histogram summary. Raises: ValueError: If the tag is already in use. """ tag = tag or '%s_summary' % tensor.op.name return summary.histogram(tag, tensor)
def testClassicSummaryOpsErrorOut(self): x = constant_op.constant(42) x_summary = summary.scalar('x', x) y = constant_op.constant([1, 3, 3, 7]) y_summary = summary.histogram('hist', y) with self.assertRaisesRegexp( RuntimeError, r'Merging tf\.summary\.\* ops is not compatible with eager execution'): summary.merge([x_summary, y_summary]) with self.assertRaisesRegexp( RuntimeError, r'Merging tf\.summary\.\* ops is not compatible with eager execution'): summary.merge_all()
def testMergeAllSummaries(self): with ops.Graph().as_default(): const = constant_op.constant(10.0) summ1 = summary.histogram("h", const) summ2 = summary.scalar("o", const, collections=["foo_key"]) summ3 = summary.scalar("c", const) merge = summary.merge_all() self.assertEqual("MergeSummary", merge.op.type) self.assertEqual(2, len(merge.op.inputs)) self.assertEqual(summ1, merge.op.inputs[0]) self.assertEqual(summ3, merge.op.inputs[1]) merge = summary.merge_all("foo_key") self.assertEqual("MergeSummary", merge.op.type) self.assertEqual(1, len(merge.op.inputs)) self.assertEqual(summ2, merge.op.inputs[0]) self.assertTrue(summary.merge_all("bar_key") is None)
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients(model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [grad.values if is_indexed_slices(grad) else grad for grad in grads] tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir)
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: tf_summary.histogram(weight.name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = w_img.get_shape() if len(shape) > 1 and shape[0] > shape[1]: w_img = array_ops.transpose(w_img) if len(shape) == 1: w_img = array_ops.expand_dims(w_img, 0) w_img = array_ops.expand_dims( array_ops.expand_dims(w_img, 0), -1) tf_summary.image(weight.name, w_img) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir) if self.embeddings_freq: self.saver = saver_lib.Saver() embeddings_layer_names = self.embeddings_layer_names if not embeddings_layer_names: embeddings_layer_names = [ layer.name for layer in self.model.layers if type(layer).__name__ == 'Embedding' ] embeddings = { layer.name: layer.weights[0] for layer in self.model.layers if layer.name in embeddings_layer_names } embeddings_metadata = {} if not isinstance(self.embeddings_metadata, str): embeddings_metadata = self.embeddings_metadata else: embeddings_metadata = { layer_name: self.embeddings_metadata for layer_name in embeddings.keys() } config = projector.ProjectorConfig() self.embeddings_logs = [] for layer_name, tensor in embeddings.items(): embedding = config.embeddings.add() embedding.tensor_name = tensor.name self.embeddings_logs.append( os.path.join(self.log_dir, layer_name + '.ckpt')) if layer_name in embeddings_metadata: embedding.metadata_path = embeddings_metadata[layer_name] projector.visualize_embeddings(self.writer, config)
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: tf_summary.histogram(weight.name, weight) if self.write_grads: grads = model.optimizer.get_gradients(model.total_loss, weight) tf_summary.histogram('{}_grad'.format(weight.name), grads) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(weight.name, w_img) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir) if self.embeddings_freq: embeddings_layer_names = self.embeddings_layer_names if not embeddings_layer_names: embeddings_layer_names = [ layer.name for layer in self.model.layers if type(layer).__name__ == 'Embedding' ] embeddings = { layer.name: layer.weights[0] for layer in self.model.layers if layer.name in embeddings_layer_names } self.saver = saver_lib.Saver(list(embeddings.values())) embeddings_metadata = {} if not isinstance(self.embeddings_metadata, str): embeddings_metadata = self.embeddings_metadata else: embeddings_metadata = { layer_name: self.embeddings_metadata for layer_name in embeddings.keys() } config = projector.ProjectorConfig() self.embeddings_ckpt_path = os.path.join(self.log_dir, 'keras_embedding.ckpt') for layer_name, tensor in embeddings.items(): embedding = config.embeddings.add() embedding.tensor_name = tensor.name if layer_name in embeddings_metadata: embedding.metadata_path = embeddings_metadata[layer_name] projector.visualize_embeddings(self.writer, config)
def optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None, colocate_gradients_with_ops=False, increment_global_step=True): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers include: - by string specifying the name of the optimizer. See OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - by function taking learning rate `Tensor` as argument and returning an `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`. - by a subclass of `Optimizer` having a single-argument constructor (the argument is the learning rate), such as AdamOptimizer or AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`. - by an instance of a subclass of `Optimizer`. E.g., `optimize_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`. Args: loss: Scalar `Tensor`. global_step: Scalar int `Tensor`, step counter to update on each step unless `increment_global_step` is `False`. If not supplied, it will be fetched from the default graph (see `tf.train.get_global_step` for details). If it has not been created, no step will be incremented with each weight update. `learning_rate_decay_fn` requires `global_step`. learning_rate: float or `Tensor`, magnitude of update per each training step. Can be `None`. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer` that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantiation of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float, callable or `None`. If float, is provided, a global clipping is applied to prevent the norm of the gradient to exceed this value. Alternatively, a callable can be provided e.g.: adaptive_clipping. This callable takes a `list` of `(gradients, variables)` `tuple`s and returns the same thing with the gradients modified. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: `tf.train.exponential_decay`. Ignored if `learning_rate` is not supplied. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. increment_global_step: Whether to increment `global_step`. If your model calls `optimize_loss` multiple times per training step (e.g. to optimize different parts of the model), use this arg to avoid incrementing `global_step` more times than necessary. Returns: Training op. Raises: ValueError: if: * `loss` is an invalid type or shape. * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` has the wrong type. * `clip_gradients` is neither float nor callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. * `gradients` is empty. """ loss = ops.convert_to_tensor(loss) contrib_framework.assert_scalar(loss) if global_step is None: global_step = contrib_framework.get_global_step() else: contrib_framework.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if (isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0): lr = learning_rate elif isinstance(learning_rate, float): if learning_rate < 0.0: raise ValueError("Invalid learning_rate %s.", learning_rate) lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError( "Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate", "global_gradient_norm"] else: for summ in summaries: if summ not in OPTIMIZER_SUMMARIES: raise ValueError( "Summaries should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_SUMMARIES), summ)) if learning_rate is not None and learning_rate_decay_fn is not None: if global_step is None: raise ValueError( "global_step is required for learning_rate_decay_fn.") lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: summary.scalar("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr) else: opt = optimizer() if not isinstance(opt, optimizer_.Optimizer): raise ValueError( "Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError( "Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops=colocate_gradients_with_ops) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) if not gradients: raise ValueError( "Empty list of (gradient, var) pairs encountered. This is most " "likely to be caused by an improper value of gradient_multipliers." ) if "global_gradient_norm" in summaries or "gradient_norm" in summaries: summary.scalar("global_norm/gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Optionally clip gradients by global norm. if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) elif callable(clip_gradients): gradients = clip_gradients(gradients) elif clip_gradients is not None: raise ValueError("Unknown type %s for clip_gradients" % type(clip_gradients)) # Add scalar summary for loss. if "loss" in summaries: summary.scalar("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, grad_values) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if clip_gradients is not None and ("global_gradient_norm" in summaries or "gradient_norm" in summaries): summary.scalar("global_norm/clipped_gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Create gradient updates. grad_updates = opt.apply_gradients( gradients, global_step=global_step if increment_global_step else None, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def optimize_loss(loss, learning_rate, optimizer, optimizer_params, global_step=None, dtype=tf.float32, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None, colocate_gradients_with_ops=False, increment_global_step=True, LARC_nu=None, LARC_mode='clip', loss_scale=1.0, automatic_loss_scaling=None, on_horovod=False): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers include: - by string specifying the name of the optimizer. See OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - by function taking learning rate `Tensor` as argument and returning an `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`. - by a subclass of `Optimizer` having a single-argument constructor (the argument is the learning rate), such as AdamOptimizer or AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`. - by an instance of a subclass of `Optimizer`. E.g., `optimize_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`. Args: loss: Scalar `Tensor`. global_step: Scalar int `Tensor`, step counter to update on each step unless `increment_global_step` is `False`. If not supplied, it will be fetched from the default graph (see `tf.train.get_global_step` for details). If it has not been created, no step will be incremented with each weight update. `learning_rate_decay_fn` requires `global_step`. learning_rate: float or `Tensor`, magnitude of update per each training step. Can be `None`. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer` that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantiation of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float, callable or `None`. If float, is provided, a global clipping is applied to prevent the norm of the gradient to exceed this value. Alternatively, a callable can be provided e.g.: adaptive_clipping. This callable takes a `list` of `(gradients, variables)` `tuple`s and returns the same thing with the gradients modified. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: `tf.train.exponential_decay`. Ignored if `learning_rate` is not supplied. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. increment_global_step: Whether to increment `global_step`. If your model calls `optimize_loss` multiple times per training step (e.g. to optimize different parts of the model), use this arg to avoid incrementing `global_step` more times than necessary. LARC_mode: 'scale' or 'clip' LARC_nu: If not None, LARC re-scaling will be applied https://arxiv.org/pdf/1708.03888.pdf with nu=LARC_nu automatic_loss_scaling: if not None, use the corresponding automatic loss scaling algorithm. Must be one of 'Backoff' of 'LogMax'. `dtype` must be "mixed" to use ALS. Returns: Training op. Raises: ValueError: if: * `loss` is an invalid type or shape. * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` has the wrong type. * `clip_gradients` is neither float nor callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. * `gradients` is empty. """ loss = ops.convert_to_tensor(loss) contrib_framework.assert_scalar(loss) if global_step is None: global_step = tf.train.get_or_create_global_step() else: tf.train.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if isinstance(learning_rate, ops.Tensor) and \ learning_rate.get_shape().ndims == 0: lr = learning_rate elif isinstance(learning_rate, float): if learning_rate < 0.0: raise ValueError("Invalid learning_rate %s.", learning_rate) lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError( "Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["learning_rate", "global_gradient_norm"] else: for summ in summaries: if summ not in OPTIMIZER_SUMMARIES: raise ValueError( "Summaries should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_SUMMARIES), summ)) if learning_rate is not None and learning_rate_decay_fn is not None: if global_step is None: raise ValueError( "global_step is required for learning_rate_decay_fn.") lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: summary.scalar("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr, **optimizer_params) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr, **optimizer_params) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr, **optimizer_params) else: opt = optimizer(**optimizer_params) if not isinstance(opt, optimizer_.Optimizer): raise ValueError( "Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError( "Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) if on_horovod: import horovod.tensorflow as hvd opt = hvd.DistributedOptimizer(opt) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() if automatic_loss_scaling is not None: if not automatic_loss_scaling in AutomaticLossScaler.SUPPORTED_ALGOS: raise ValueError( "Unknown automatic loss scaling algorithm: %s." % automatic_loss_sclaing) if dtype != "mixed": raise ValueError( "Automatic loss scaling can be used only with " "dtype=mixed.") loss_scaler = AutomaticLossScaler(algorithm=automatic_loss_scaling) else: loss_scaler = None if dtype == 'mixed': opt = MixedPrecisionOptimizerWrapper( opt, automatic_loss_scaler=loss_scaler, ) # Compute gradients. gradients = opt.compute_gradients( loss if loss_scale == 1.0 else loss * loss_scale, variables, colocate_gradients_with_ops=colocate_gradients_with_ops) if loss_scale != 1.0: gradients = _multiply_gradients_const(gradients, 1.0 / loss_scale) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) if not gradients: raise ValueError( "Empty list of (gradient, var) pairs encountered. This is most " "likely to be caused by an improper value of gradient_multipliers." ) if "global_gradient_norm" in summaries or "gradient_norm" in summaries: summary.scalar( "global_norm/gradient_norm", clip_ops.global_norm( list( map(lambda x: tf.cast(x, tf.float32), list(zip(*gradients))[0]))), ) # Optionally clip gradients by global norm. if clip_gradients is not None and LARC_nu is not None: raise AttributeError( "LARC and gradient norm clipping should not be used together") if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) elif callable(clip_gradients): gradients = clip_gradients(gradients) elif clip_gradients is not None: raise ValueError("Unknown type %s for clip_gradients" % type(clip_gradients)) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if isinstance(variable, ops.IndexedSlices): var_values = variable.values else: var_values = variable if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, mask_nans(grad_values)) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if "variables" in summaries: summary.histogram("variables/%s" % var_name, var_values) if "variable_norm" in summaries: summary.scalar("variable_norm/%s" % var_name, clip_ops.global_norm([var_values])) if clip_gradients is not None and ("global_gradient_norm" in summaries or "gradient_norm" in summaries): summary.scalar( "global_norm/clipped_gradient_norm", clip_ops.global_norm( list( map(lambda x: tf.cast(x, tf.float32), list(zip(*gradients))[0]))), ) # LARC gradient re-scaling if LARC_nu is not None and isinstance(LARC_nu, float): for idx, (g, v) in enumerate(gradients): var_dtype = v.dtype v_norm = tf.norm(tensor=tf.cast(v, tf.float32), ord=2) g_norm = tf.norm(tensor=tf.cast(g, tf.float32), ord=2) zero_const = tf.constant(0.0, dtype=tf.float32) # this condition is necessary for two reasons. # First, g_norm can be zero. In that case we can not use the usual # formula, but it does not matter what larc_local_lr is, since it will # be multiplied by g = 0. # Second, v_norm can be zero. In that case, we want to switch to the # usual gradient descent since the usual LARC update will always be 0. # Thus, we make larc_local_lr = 1e-5 so that # we move a little bit away from zero and can use LARC again larc_local_lr = tf.cond( pred=tf.logical_and( tf.greater(v_norm, zero_const), tf.greater(g_norm, zero_const), ), true_fn=lambda: LARC_nu * v_norm / g_norm, false_fn=lambda: tf.Print( tf.constant(1e-5, dtype=tf.float32), [], "g_norm = 0 or v_norm = 0 for {}".format(v.name)), ) if LARC_mode == 'clip': summary.scalar( 'switched_to_global/{}'.format(v.name), tf.cast(tf.greater(larc_local_lr, lr), tf.int32)) larc_local_lr = tf.cond( pred=tf.less(larc_local_lr, lr), true_fn=lambda: larc_local_lr / lr, false_fn=lambda: tf.constant(1.0, dtype=tf.float32), ) larc_local_lr = tf.saturate_cast(larc_local_lr, var_dtype) summary.scalar('larc_local_lr/{}'.format(v.name), larc_local_lr) summary.scalar('larc_effective_lr/{}'.format(v.name), larc_local_lr * tf.cast(lr, var_dtype)) gradients[idx] = (larc_local_lr * g, v) # Create gradient updates. grad_updates = opt.apply_gradients( gradients, global_step=global_step if increment_global_step else None, name="train") # # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def _add_hidden_layer_summary(value, tag): summary.scalar('%s/fraction_of_zero_values' % tag, nn.zero_fraction(value)) summary.histogram('%s/activation' % tag, value)
def testHistogramSummaryTypes(self): for dtype in (dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.int32, dtypes.float32, dtypes.float64): const = constant_op.constant(10, dtype=dtype) summary_lib.histogram('h', const)
def testHistogramSummaryTypes(self): with ops.Graph().as_default(): for dtype in (dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.int32, dtypes.float32, dtypes.float64): const = constant_op.constant(10, dtype=dtype) summary.histogram("h", const)
def build_controller(self): """RL optimization interface. Returns: ops: A dictionary holding handles of the model used for training. """ self._global_step = training_util.get_or_create_global_step() ops = {} ops["loss"] = 0 failing_signal = self.compute_reward(self.hparams.failing_signal) ctr = {} with tf_ops.name_scope("controller_{}".format(self.ctrl_id)): with variable_scope.variable_scope("controller_{}".format(self.ctrl_id)): ctr["reward"] = {"value": [], "ph": [], "update": []} ctr["ready"] = {"value": [], "ph": [], "update": []} ctr["best_reward"] = {"value": [], "update": []} for i in range(self.hparams.num_children): reward_value = variable_scope.get_local_variable( "reward_{}".format(i), initializer=0.0, dtype=dtypes.float32, trainable=False) reward_ph = array_ops.placeholder( dtypes.float32, shape=(), name="reward_ph_{}".format(i)) reward_update = state_ops.assign( reward_value, reward_ph, use_locking=True) ctr["reward"]["value"].append(reward_value) ctr["reward"]["ph"].append(reward_ph) ctr["reward"]["update"].append(reward_update) best_reward = variable_scope.get_local_variable( "best_reward_{}".format(i), initializer=failing_signal, dtype=dtypes.float32, trainable=False) ctr["best_reward"]["value"].append(best_reward) ctr["best_reward"]["update"].append( state_ops.assign(best_reward, math_ops.minimum(best_reward, reward_update))) ready_value = variable_scope.get_local_variable( "ready_{}".format(i), initializer=True, dtype=dtypes.bool, trainable=False) ready_ph = array_ops.placeholder( dtypes.bool, shape=(), name="ready_ph_{}".format(i)) ready_update = state_ops.assign( ready_value, ready_ph, use_locking=True) ctr["ready"]["value"].append(ready_value) ctr["ready"]["ph"].append(ready_ph) ctr["ready"]["update"].append(ready_update) ctr["grouping_y_preds"], ctr["grouping_log_probs"] = self.get_groupings() summary.histogram( "grouping_actions", array_ops.slice(ctr["grouping_y_preds"]["sample"], [0, 0], [1, array_ops.shape(self.op_embeddings)[0]])) with variable_scope.variable_scope("controller_{}".format(self.ctrl_id)): ctr["baseline"] = variable_scope.get_local_variable( "baseline", initializer=failing_signal if self.hparams.start_with_failing_signal else 0.0, dtype=dtypes.float32, trainable=False) new_baseline = self.hparams.bl_dec * ctr["baseline"] + ( 1 - self.hparams.bl_dec) * math_ops.reduce_mean( ctr["reward"]["value"]) if not self.hparams.always_update_baseline: baseline_mask = math_ops.less(ctr["reward"]["value"], failing_signal) selected_reward = array_ops.boolean_mask(ctr["reward"]["value"], baseline_mask) selected_baseline = control_flow_ops.cond( math_ops.reduce_any(baseline_mask), lambda: math_ops.reduce_mean(selected_reward), lambda: constant_op.constant(0, dtype=dtypes.float32)) ctr["pos_reward"] = selected_baseline pos_ = math_ops.less( constant_op.constant(0, dtype=dtypes.float32), selected_baseline) selected_baseline = self.hparams.bl_dec * ctr["baseline"] + ( 1 - self.hparams.bl_dec) * selected_baseline selected_baseline = control_flow_ops.cond( pos_, lambda: selected_baseline, lambda: ctr["baseline"]) new_baseline = control_flow_ops.cond( math_ops.less(self.global_step, self.hparams.stop_updating_after_steps), lambda: new_baseline, lambda: selected_baseline) ctr["baseline_update"] = state_ops.assign( ctr["baseline"], new_baseline, use_locking=True) ctr["y_preds"], ctr["log_probs"] = self.get_placements() summary.histogram("actions", ctr["y_preds"]["sample"]) mask = math_ops.less(ctr["reward"]["value"], failing_signal) ctr["loss"] = ctr["reward"]["value"] - ctr["baseline"] ctr["loss"] *= ( ctr["log_probs"]["sample"] + ctr["grouping_log_probs"]["sample"]) selected_loss = array_ops.boolean_mask(ctr["loss"], mask) selected_loss = control_flow_ops.cond( math_ops.reduce_any(mask), lambda: math_ops.reduce_mean(-selected_loss), lambda: constant_op.constant(0, dtype=dtypes.float32)) ctr["loss"] = control_flow_ops.cond( math_ops.less(self.global_step, self.hparams.stop_updating_after_steps), lambda: math_ops.reduce_mean(-ctr["loss"]), lambda: selected_loss) ctr["reward_s"] = math_ops.reduce_mean(ctr["reward"]["value"]) summary.scalar("loss", ctr["loss"]) summary.scalar("avg_reward", ctr["reward_s"]) summary.scalar("best_reward_so_far", best_reward) summary.scalar( "advantage", math_ops.reduce_mean(ctr["reward"]["value"] - ctr["baseline"])) with variable_scope.variable_scope( "optimizer", reuse=variable_scope.AUTO_REUSE): (ctr["train_op"], ctr["lr"], ctr["grad_norm"], ctr["grad_norms"]) = self._get_train_ops( ctr["loss"], tf_ops.get_collection(tf_ops.GraphKeys.TRAINABLE_VARIABLES), self.global_step, grad_bound=self.hparams.grad_bound, lr_init=self.hparams.lr, lr_dec=self.hparams.lr_dec, start_decay_step=self.hparams.start_decay_step, decay_steps=self.hparams.decay_steps, optimizer_type=self.hparams.optimizer_type) summary.scalar("gradnorm", ctr["grad_norm"]) summary.scalar("lr", ctr["lr"]) ctr["summary"] = summary.merge_all() ops["controller"] = ctr self.ops = ops return ops
def set_model(self, model): """Sets Keras model and creates summary ops.""" self.model = model self.sess = K.get_session() # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients(model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [grad.values if is_indexed_slices(grad) else grad for grad in grads] tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): if isinstance(layer.output, list): for i, output in enumerate(layer.output): tf_summary.histogram('{}_out_{}'.format(layer.name, i), output) else: tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = self._writer_class(self.log_dir, self.sess.graph) else: self.writer = self._writer_class(self.log_dir) # If both embedding_freq and embeddings_data are available, we will # visualize embeddings. if self.embeddings_freq and self.embeddings_data is not None: self.embeddings_data = standardize_input_data(self.embeddings_data, model.input_names) # If embedding_layer_names are not provided, get all of the embedding # layers from the model. embeddings_layer_names = self.embeddings_layer_names if not embeddings_layer_names: embeddings_layer_names = [ layer.name for layer in self.model.layers if type(layer).__name__ == 'Embedding' ] self.assign_embeddings = [] embeddings_vars = {} self.batch_id = batch_id = array_ops.placeholder(dtypes.int32) self.step = step = array_ops.placeholder(dtypes.int32) for layer in self.model.layers: if layer.name in embeddings_layer_names: embedding_input = self.model.get_layer(layer.name).output embedding_size = np.prod(embedding_input.shape[1:]) embedding_input = array_ops.reshape(embedding_input, (step, int(embedding_size))) shape = (self.embeddings_data[0].shape[0], int(embedding_size)) embedding = variables.Variable( array_ops.zeros(shape), name=layer.name + '_embedding') embeddings_vars[layer.name] = embedding batch = state_ops.assign(embedding[batch_id:batch_id + step], embedding_input) self.assign_embeddings.append(batch) self.saver = saver.Saver(list(embeddings_vars.values())) # Create embeddings_metadata dictionary if isinstance(self.embeddings_metadata, str): embeddings_metadata = { layer_name: self.embeddings_metadata for layer_name in embeddings_vars.keys() } else: # If embedding_metadata is already a dictionary embeddings_metadata = self.embeddings_metadata try: from tensorboard.plugins import projector except ImportError: raise ImportError('Failed to import TensorBoard. Please make sure that ' 'TensorBoard integration is complete."') # TODO(psv): Add integration tests to test embedding visualization # with TensorBoard callback. We are unable to write a unit test for this # because TensorBoard dependency assumes TensorFlow package is installed. config = projector.ProjectorConfig() for layer_name, tensor in embeddings_vars.items(): embedding = config.embeddings.add() embedding.tensor_name = tensor.name if (embeddings_metadata is not None and layer_name in embeddings_metadata): embedding.metadata_path = embeddings_metadata[layer_name] projector.visualize_embeddings(self.writer, config)
def _add_hidden_layer_summary(self, value, tag): # TODO(zakaria): Move this code to tf.learn and add test. summary.scalar("%s/fraction_of_zero_values" % tag, nn.zero_fraction(value)) summary.histogram("%s/activation" % tag, value)
def _add_hidden_layer_summary(self, value, tag): summary.scalar('%s/fraction_of_zero_values' % tag, tf.nn.zero_fraction(value)) summary.histogram('%s/activation' % tag, value)
def _add_hidden_layer_summary(value, tag): summary.scalar("%s_fraction_of_zero_values" % tag, nn.zero_fraction(value)) summary.histogram("%s_activation" % tag, value)
def optimize_loss(loss, optimizer, optimizer_params, learning_rate_decay_fn, global_step=None, dtype=tf.float32, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, update_ops=None, variables=None, name=None, summaries=None, colocate_gradients_with_ops=False, increment_global_step=True, larc_params=None, loss_scale=1.0, automatic_loss_scaling=None, on_horovod=False): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers include: - by string specifying the name of the optimizer. See OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - by function taking learning rate `Tensor` as argument and returning an `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`. - by a subclass of `Optimizer` having a single-argument constructor (the argument is the learning rate), such as AdamOptimizer or AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`. - by an instance of a subclass of `Optimizer`. E.g., `optimize_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`. Args: loss: Scalar `Tensor`. global_step: Scalar int `Tensor`, step counter to update on each step unless `increment_global_step` is `False`. If not supplied, it will be fetched from the default graph (see `tf.train.get_global_step` for details). If it has not been created, no step will be incremented with each weight update. `learning_rate_decay_fn` requires `global_step`. learning_rate: float or `Tensor`, magnitude of update per each training step. Can be `None`. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer` that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantiation of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float, callable or `None`. If float, is provided, a global clipping is applied to prevent the norm of the gradient to exceed this value. Alternatively, a callable can be provided e.g.: adaptive_clipping. This callable takes a `list` of `(gradients, variables)` `tuple`s and returns the same thing with the gradients modified. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: `tf.train.exponential_decay`. Ignored if `learning_rate` is not supplied. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. increment_global_step: Whether to increment `global_step`. If your model calls `optimize_loss` multiple times per training step (e.g. to optimize different parts of the model), use this arg to avoid incrementing `global_step` more times than necessary. LARC_mode: 'scale' or 'clip' LARC_nu: If not None, LARC re-scaling will be applied https://arxiv.org/pdf/1708.03888.pdf with nu=LARC_nu automatic_loss_scaling: if not None, use the corresponding automatic loss scaling algorithm. Must be one of 'Backoff' of 'LogMax'. `dtype` must be "mixed" to use ALS. Returns: Training op. Raises: ValueError: if: * `loss` is an invalid type or shape. * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` has the wrong type. * `clip_gradients` is neither float nor callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. * `gradients` is empty. """ loss = ops.convert_to_tensor(loss) contrib_framework.assert_scalar(loss) if global_step is None: global_step = tf.train.get_or_create_global_step() else: tf.train.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) if summaries is None: summaries = ["learning_rate", "global_gradient_norm"] else: for summ in summaries: if summ not in OPTIMIZER_SUMMARIES: raise ValueError("Summaries should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_SUMMARIES), summ)) if global_step is None: raise ValueError("global_step is required for learning_rate_decay_fn.") lr = learning_rate_decay_fn(global_step) if "learning_rate" in summaries: summary.scalar("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError("Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr, **optimizer_params) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError("Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr, **optimizer_params) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if lr is not None: opt = optimizer(lr, **optimizer_params) else: opt = optimizer(**optimizer_params) if not isinstance(opt, optimizer_.Optimizer): raise ValueError("Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError("Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() if automatic_loss_scaling is not None: if automatic_loss_scaling not in AutomaticLossScaler.SUPPORTED_ALGOS: raise ValueError("Unknown automatic loss scaling algorithm: %s." % automatic_loss_sclaing) if dtype != "mixed": raise ValueError("Automatic loss scaling can be used only with " "dtype=mixed.") loss_scale = AutomaticLossScaler(algorithm=automatic_loss_scaling) if dtype == 'mixed': opt = MixedPrecisionOptimizerWrapper(opt, loss_scale=loss_scale) if on_horovod: opt = DistributedOptimizer(opt) # Compute gradients. gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops=colocate_gradients_with_ops, ) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) if not gradients: raise ValueError( "Empty list of (gradient, var) pairs encountered. This is most " "likely to be caused by an improper value of gradient_multipliers.") if "global_gradient_norm" in summaries or "gradient_norm" in summaries: summary.scalar( "global_norm/gradient_norm", clip_ops.global_norm(list(map( lambda x: tf.cast(x, tf.float32), list(zip(*gradients))[0]) )), ) # Optionally clip gradients by global norm. if clip_gradients is not None and larc_params is not None: raise AttributeError( "LARC and gradient norm clipping should not be used together" ) if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) elif callable(clip_gradients): gradients = clip_gradients(gradients) elif clip_gradients is not None: raise ValueError( "Unknown type %s for clip_gradients" % type(clip_gradients)) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if isinstance(variable, ops.IndexedSlices): var_values = variable.values else: var_values = variable if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, mask_nans(grad_values)) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if "variables" in summaries: summary.histogram("variables/%s" % var_name, var_values) if "variable_norm" in summaries: summary.scalar("variable_norm/%s" % var_name, clip_ops.global_norm([var_values])) if clip_gradients is not None and ("global_gradient_norm" in summaries or "gradient_norm" in summaries): summary.scalar( "global_norm/clipped_gradient_norm", clip_ops.global_norm(list(map( lambda x: tf.cast(x, tf.float32), list(zip(*gradients))[0]) )), ) # LARC gradient re-scaling if larc_params is not None: check_params( config=larc_params, required_dict={'larc_eta': float}, optional_dict={ 'larc_mode': ['clip', 'scale'], 'min_update': float, 'epsilon': float }, ) larc_eta = larc_params['larc_eta'] larc_mode = larc_params.get('larc_mode', 'clip') min_update = larc_params.get('min_update', 1e-7) eps = larc_params.get('epsilon', 1e-7) for idx, (g, v) in enumerate(gradients): var_dtype = v.dtype v_norm = tf.norm(tensor=tf.cast(v, tf.float32), ord=2) g_norm = tf.norm(tensor=tf.cast(g, tf.float32), ord=2) if larc_mode == 'clip': larc_grad_update = tf.maximum( larc_eta * v_norm / (lr * (g_norm + eps)), min_update, ) if "larc_summaries" in summaries: summary.scalar('larc_clip_on/{}'.format(v.name), tf.cast(tf.less(larc_grad_update, 1.0), tf.int32)) larc_grad_update = tf.minimum(larc_grad_update, 1.0) else: larc_grad_update = tf.maximum( larc_eta * v_norm / (g_norm + eps), min_update, ) larc_grad_update = tf.saturate_cast(larc_grad_update, var_dtype) gradients[idx] = (larc_grad_update * g, v) # adding additional summary if "larc_summaries" in summaries: summary.scalar('larc_grad_update/{}'.format(v.name), larc_grad_update) summary.scalar("larc_final_lr/{}".format(v.name), tf.cast(lr, var_dtype) * larc_grad_update) # Create gradient updates. grad_updates = opt.apply_gradients( gradients, global_step=global_step if increment_global_step else None, name="train") # # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_grads: grads = model.optimizer.get_gradients(model.total_loss, weight) tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir) if self.embeddings_freq: embeddings_layer_names = self.embeddings_layer_names if not embeddings_layer_names: embeddings_layer_names = [ layer.name for layer in self.model.layers if type(layer).__name__ == 'Embedding' ] embeddings = { layer.name: layer.weights[0] for layer in self.model.layers if layer.name in embeddings_layer_names } self.saver = saver_lib.Saver(list(embeddings.values())) embeddings_metadata = {} if not isinstance(self.embeddings_metadata, str): embeddings_metadata = self.embeddings_metadata else: embeddings_metadata = { layer_name: self.embeddings_metadata for layer_name in embeddings.keys() } config = projector.ProjectorConfig() self.embeddings_ckpt_path = os.path.join(self.log_dir, 'keras_embedding.ckpt') for layer_name, tensor in embeddings.items(): embedding = config.embeddings.add() embedding.tensor_name = tensor.name if layer_name in embeddings_metadata: embedding.metadata_path = embeddings_metadata[layer_name] projector.visualize_embeddings(self.writer, config)
def optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None, colocate_gradients_with_ops=False, increment_global_step=True): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers, include: - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - function, takes learning rate `Tensor` as argument and must return `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`. - class, subclass of `Optimizer` that takes only one required argument - learning rate, such as AdamOptimizer, AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`. - object, instance of subclass of `Optimizer`. E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`. Args: loss: Scalar `Tensor`. global_step: Scalar int `Tensor`, step counter to update on each step unless `increment_global_step` is `False`. If not supplied, it will be fetched from the default graph (see `tf.train.get_global_step` for details). If it's not been created, no step will be incremented with each weight update. `learning_rate_decay_fn` requires `global_step`. learning_rate: float or `Tensor`, magnitude of update per each training step. Can be `None`. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer` that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantiation of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float, callable or `None`. If float, is provided, a global clipping is applied to prevent the norm of the gradient to exceed this value. Alternatively, a callable can be provided e.g.: adaptive_clipping. This callable takes a `list` of `(gradients, variables)` `tuple`s and returns the same thing with the gradients modified. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: `tf.train.exponential_decay`. Ignored if `learning_rate` is not supplied. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. increment_global_step: Whether to increment `global_step`. If your model calls `optimize_loss` multiple times per training step (e.g. to optimize different parts of the model), use this arg to avoid incrementing `global_step` more times than necessary. Returns: Training op. Raises: ValueError: if: * `loss` is an invalid type or shape. * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` is wrong type. * `clip_gradients` is not float or callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. * `gradients` is empty """ loss = ops.convert_to_tensor(loss) contrib_framework.assert_scalar(loss) if global_step is None: global_step = contrib_framework.get_global_step() else: contrib_framework.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if (isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0): lr = learning_rate elif isinstance(learning_rate, float): if learning_rate < 0.0: raise ValueError("Invalid learning_rate %s.", learning_rate) lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError("Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate"] else: for summ in summaries: if summ not in OPTIMIZER_SUMMARIES: raise ValueError("Summaries should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_SUMMARIES), summ)) if learning_rate is not None and learning_rate_decay_fn is not None: if global_step is None: raise ValueError("global_step is required for learning_rate_decay_fn.") lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: summary.scalar("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError("Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError("Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr) else: opt = optimizer() if not isinstance(opt, optimizer_.Optimizer): raise ValueError("Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError("Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops=colocate_gradients_with_ops) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) if not gradients: raise ValueError( "Empty list of (gradient, var) pairs encountered. This is most " "likely to be caused by an improper value of gradient_multipliers.") if "gradient_norm" in summaries: summary.scalar("global_norm/gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Optionally clip gradients by global norm. if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) elif callable(clip_gradients): gradients = clip_gradients(gradients) elif clip_gradients is not None: raise ValueError( "Unknown type %s for clip_gradients" % type(clip_gradients)) # Add scalar summary for loss. if "loss" in summaries: summary.scalar("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, grad_values) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if clip_gradients is not None and "gradient_norm" in summaries: summary.scalar("global_norm/clipped_gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Create gradient updates. grad_updates = opt.apply_gradients( gradients, global_step=global_step if increment_global_step else None, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def set_model(self, model): """Sets Keras model and creates summary ops.""" self.model = model self.sess = K.get_session() # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients( model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [ grad.values if is_indexed_slices(grad) else grad for grad in grads ] tf_summary.histogram( '{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): if isinstance(layer.output, list): for i, output in enumerate(layer.output): tf_summary.histogram( '{}_out_{}'.format(layer.name, i), output) else: tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = self._writer_class(self.log_dir, self.sess.graph) else: self.writer = self._writer_class(self.log_dir) # If both embedding_freq and embeddings_data are available, we will # visualize embeddings. if self.embeddings_freq and self.embeddings_data is not None: self.embeddings_data = standardize_input_data( self.embeddings_data, model.input_names) # If embedding_layer_names are not provided, get all of the embedding # layers from the model. embeddings_layer_names = self.embeddings_layer_names if not embeddings_layer_names: embeddings_layer_names = [ layer.name for layer in self.model.layers if type(layer).__name__ == 'Embedding' ] self.assign_embeddings = [] embeddings_vars = {} self.batch_id = batch_id = array_ops.placeholder(dtypes.int32) self.step = step = array_ops.placeholder(dtypes.int32) for layer in self.model.layers: if layer.name in embeddings_layer_names: embedding_input = self.model.get_layer(layer.name).output embedding_size = np.prod(embedding_input.shape[1:]) embedding_input = array_ops.reshape( embedding_input, (step, int(embedding_size))) shape = (self.embeddings_data[0].shape[0], int(embedding_size)) embedding = variables.Variable(array_ops.zeros(shape), name=layer.name + '_embedding') embeddings_vars[layer.name] = embedding batch = state_ops.assign( embedding[batch_id:batch_id + step], embedding_input) self.assign_embeddings.append(batch) self.saver = saver.Saver(list(embeddings_vars.values())) # Create embeddings_metadata dictionary if isinstance(self.embeddings_metadata, str): embeddings_metadata = { layer_name: self.embeddings_metadata for layer_name in embeddings_vars.keys() } else: # If embedding_metadata is already a dictionary embeddings_metadata = self.embeddings_metadata try: from tensorboard.plugins import projector except ImportError: raise ImportError( 'Failed to import TensorBoard. Please make sure that ' 'TensorBoard integration is complete."') # TODO(psv): Add integration tests to test embedding visualization # with TensorBoard callback. We are unable to write a unit test for this # because TensorBoard dependency assumes TensorFlow package is installed. config = projector.ProjectorConfig() for layer_name, tensor in embeddings_vars.items(): embedding = config.embeddings.add() embedding.tensor_name = tensor.name if (embeddings_metadata is not None and layer_name in embeddings_metadata): embedding.metadata_path = embeddings_metadata[layer_name] projector.visualize_embeddings(self.writer, config)
def optimize_loss(losses, global_step, learning_rate, optimizer, num_gpus=1, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=["global_gradient_norm"], colocate_gradients_with_ops=False, increment_global_step=True, use_tpu=False, use_horovod=False): """Given loss and parameters for optimizer, returns a training op. Args: loss: Tensor, 0 dimensional. global_step: Tensor, step counter for each update. learning_rate: float or Tensor, magnitude of update per each training step. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of tf.Optimizer that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantion of tf.Optimizer sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float or `None`, clips gradients by this value. moving_average_decay: Deprecated. float or None, takes into account previous loss to make learning smoother due to outliers. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: tf.train.exponential_decay. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. Returns: Training op. Raises: ValueError: if optimizer is wrong type. """ with vs.variable_scope(name, "OptimizeLoss", losses + [global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) #--from https://github.com/tensorflow/tensorflow/blob/28c3c5dd38e3b397c2cf0acdaa6388dcbf0349f7/tensorflow/contrib/layers/python/layers/optimizers.py # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if (isinstance(learning_rate, ops.Tensor) or isinstance(learning_rate, tf.Variable) and learning_rate.get_shape().ndims == 0): #print('------------------optimize_loss learning rate do nothhing', learning_rate) lr = learning_rate elif isinstance(learning_rate, float): if learning_rate < 0.0: raise ValueError("Invalid learning_rate %s.", learning_rate) lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError( "Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if learning_rate is not None and learning_rate_decay_fn is not None: if global_step is None: raise ValueError( "global_step is required for learning_rate_decay_fn.") lr = learning_rate_decay_fn(lr, global_step) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): #print('------------------optimize_loss optimizer do nothing', optimizer) opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr) else: opt = optimizer() if not isinstance(opt, optimizer_.Optimizer): pass # TODO all tf.keras.optimizers #raise ValueError("Unrecognized optimizer: function should return " # "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError( "Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) if use_tpu: opt = tf.contrib.tpu.CrossShardOptimizer(opt) assert not (use_tpu and use_horovod) if use_horovod: if FLAGS.torch: import horovod.torch as hvd else: import horovod.tensorflow as hvd #https://blog.csdn.net/qq_16234613/article/details/96186398 # we enable compression only for fp16 #import horovod.tensorflow as hvd #from horovod.tensorflow.compression import Compression # if use_fp16: # compression = Compression.fp16 # else: # compression = Compression.none # opt = hvd.DistributedOptimizer(opt, sparse_as_dense=True,t7 # compression=compression) opt = hvd.DistributedOptimizer(opt) ## just use opt.minize 0.1 epoch 64 * 8 gpu auc 0.717 if cmment this line using gradiens.. auc 0.7186 #return opt.minimize(losses[0], global_step=global_step if increment_global_step else None) if num_gpus > 1: # Calculate the gradients for each model tower. # TODO check below is all ok, right now single gpu using below code will be slower then tf.contrib.optimize_loss 4.5 batch/s -> 3 batch/s tower_grads = [] for i in range(num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ('tower', i)) as name_scope: # All trainable variables, if specific variables are not specified. #-- TODO trainable_variables affect speed ? if variables is None: variables = vars_.trainable_variables() # Compute gradients. loss = losses[i] # if update_ops: # loss = control_flow_ops.with_dependencies(list(update_ops), loss) #print('------------',) try: gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops= colocate_gradients_with_ops) except Exception: # try: # gradients = opt.compute_gradients(loss) # except Exception: gradients = opt.get_updates(loss, params=variables) #TODO FIXME might have None for example add another predictor to graph #[(None, <tf.Variable 'dual_bow/model_init/emb:0' shape=(29285, 256) dtype=float32_ref>), #(None, <tf.Variable 'dual_bow/main/dual_textsim/encode/text_mlp/linear/weights:0' shape=(256, 256) dtype=float32_ref>), #(<tensorflow.python.framework.ops.IndexedSlices object at 0x1b72ff50>, <tf.Variable 'seq2seq/model_init/emb:0' shape=(29285, 256) dtype=float32_ref>) #print('-------gradients1', gradients) #--now hack use below, TODO why dual_bow.. in introduced when compute gradient of loss as seem not related my seq2seq loss? gradients = [x for x in gradients if x[0] is not None] # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients( gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients( gradients, gradient_multipliers) # Optionally clip gradients by global norm. if clip_gradients is not None: gradients = _clip_gradients_by_norm( gradients, clip_gradients) #print('-------gradients', gradients) tower_grads.append(gradients) gradients = average_gradients(tower_grads) if "global_gradient_norm" in summaries or "gradient_norm" in summaries: summary.scalar("global_norm/gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, grad_values) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if clip_gradients is not None and ( "global_gradient_norm" in summaries or "gradient_norm" in summaries): summary.scalar("global_norm/clipped_gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) else: loss = losses[0] ## similar but will do do clip gradient and other things, if comment auc 0.72739 not comment 0.72634 #return opt.minimize(losses[0], global_step=global_step if increment_global_step else None) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. try: gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops=colocate_gradients_with_ops) except Exception: # TODO not work for keras gradients = opt.get_updates(loss=loss, params=variables) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients( gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) if not gradients: raise ValueError( "Empty list of (gradient, var) pairs encountered. This is most " "likely to be caused by an improper value of gradient_multipliers." ) if "global_gradient_norm" in summaries or "gradient_norm" in summaries: summary.scalar("global_norm/gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Optionally clip gradients by global norm. if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) elif callable(clip_gradients): gradients = clip_gradients(gradients) elif clip_gradients is not None: raise ValueError("Unknown type %s for clip_gradients" % type(clip_gradients)) # # Add scalar summary for loss. # if "loss" in summaries: # summary.scalar("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, grad_values) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if clip_gradients is not None and ( "global_gradient_norm" in summaries or "gradient_norm" in summaries): summary.scalar("global_norm/clipped_gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Create gradient updates. grad_updates = opt.apply_gradients( gradients, global_step=global_step if increment_global_step else None, name="train") # IMPORTANT this is needed for momentum! if update_ops: grad_updates = [grad_updates] #print('--------------------1', grad_updates) grad_updates.extend(update_ops) #print('-------------------2', update_ops) grad_updates = tf.group(*grad_updates) #print('-----------grad updates', grad_updates) # # Make sure total_loss is valid. # final_loss = array_ops.check_numerics(loss, "Loss is inf or nan") # # Ensure the train_tensor computes grad_updates. # train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss) #return train_tensor return grad_updates
def set_model(self, model): """Sets Keras model and creates summary ops.""" self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients( model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [ grad.values if is_indexed_slices(grad) else grad for grad in grads ] tf_summary.histogram( '{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = self._writer_class(self.log_dir, self.sess.graph) else: self.writer = self._writer_class(self.log_dir)
def logistic_regression(x, y, class_weight=None, init_mean=None, init_stddev=1.0): """Creates logistic regression TensorFlow subgraph. Args: x: tensor or placeholder for input features, shape should be [batch_size, n_features]. y: tensor or placeholder for labels (one-hot), shape should be [batch_size, n_classes]. class_weight: tensor, [n_classes], where for each class it has weight of the class. If not provided will check if graph contains tensor `class_weight:0`. If that is not provided either all ones are used. init_mean: the mean value to use for initialization. init_stddev: the standard devation to use for initialization. Returns: Predictions and loss tensors. Side effects: The variables linear_regression.weights and linear_regression.bias are initialized as follows. If init_mean is not None, then initialization will be done using a random normal initializer with the given init_mean and init_stddv. (These may be set to 0.0 each if a zero initialization is desirable for convex use cases.) If init_mean is None, then the uniform_unit_scaling_initialzer will be used. """ with vs.variable_scope('logistic_regression'): scope_name = vs.get_variable_scope().name summary.histogram('%s.x' % scope_name, x) summary.histogram('%s.y' % scope_name, y) dtype = x.dtype.base_dtype # Set up the requested initialization. if init_mean is None: weights = vs.get_variable( 'weights', [x.get_shape()[1], y.get_shape()[-1]], dtype=dtype) bias = vs.get_variable('bias', [y.get_shape()[-1]], dtype=dtype) else: weights = vs.get_variable( 'weights', [x.get_shape()[1], y.get_shape()[-1]], initializer=init_ops.random_normal_initializer( init_mean, init_stddev, dtype=dtype), dtype=dtype) bias = vs.get_variable( 'bias', [y.get_shape()[-1]], initializer=init_ops.random_normal_initializer( init_mean, init_stddev, dtype=dtype), dtype=dtype) summary.histogram('%s.weights' % scope_name, weights) summary.histogram('%s.bias' % scope_name, bias) # If no class weight provided, try to retrieve one from pre-defined # tensor name in the graph. if not class_weight: try: class_weight = ops.get_default_graph().get_tensor_by_name( 'class_weight:0') except KeyError: pass return losses_ops.softmax_classifier( x, y, weights, bias, class_weight=class_weight)
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: tf_summary.histogram(weight.name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = w_img.get_shape() if len(shape) > 1 and shape[0] > shape[1]: w_img = array_ops.transpose(w_img) if len(shape) == 1: w_img = array_ops.expand_dims(w_img, 0) w_img = array_ops.expand_dims(array_ops.expand_dims(w_img, 0), -1) tf_summary.image(weight.name, w_img) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir) if self.embeddings_freq: self.saver = saver_lib.Saver() embeddings_layer_names = self.embeddings_layer_names if not embeddings_layer_names: embeddings_layer_names = [ layer.name for layer in self.model.layers if type(layer).__name__ == 'Embedding' ] embeddings = { layer.name: layer.weights[0] for layer in self.model.layers if layer.name in embeddings_layer_names } embeddings_metadata = {} if not isinstance(self.embeddings_metadata, str): embeddings_metadata = self.embeddings_metadata else: embeddings_metadata = { layer_name: self.embeddings_metadata for layer_name in embeddings.keys() } config = projector.ProjectorConfig() self.embeddings_logs = [] for layer_name, tensor in embeddings.items(): embedding = config.embeddings.add() embedding.tensor_name = tensor.name self.embeddings_logs.append( os.path.join(self.log_dir, layer_name + '.ckpt')) if layer_name in embeddings_metadata: embedding.metadata_path = embeddings_metadata[layer_name] projector.visualize_embeddings(self.writer, config)
def logistic_regression(x, y, class_weight=None, init_mean=None, init_stddev=1.0): """Creates logistic regression TensorFlow subgraph. Args: x: tensor or placeholder for input features, shape should be [batch_size, n_features]. y: tensor or placeholder for labels (one-hot), shape should be [batch_size, n_classes]. class_weight: tensor, [n_classes], where for each class it has weight of the class. If not provided will check if graph contains tensor `class_weight:0`. If that is not provided either all ones are used. init_mean: the mean value to use for initialization. init_stddev: the standard deviation to use for initialization. Returns: Predictions and loss tensors. Side effects: The variables linear_regression.weights and linear_regression.bias are initialized as follows. If init_mean is not None, then initialization will be done using a random normal initializer with the given init_mean and init_stddv. (These may be set to 0.0 each if a zero initialization is desirable for convex use cases.) If init_mean is None, then the uniform_unit_scaling_initialzer will be used. """ with vs.variable_scope('logistic_regression'): scope_name = vs.get_variable_scope().name summary.histogram('%s.x' % scope_name, x) summary.histogram('%s.y' % scope_name, y) dtype = x.dtype.base_dtype # Set up the requested initialization. if init_mean is None: weights = vs.get_variable( 'weights', [x.get_shape()[1], y.get_shape()[-1]], dtype=dtype) bias = vs.get_variable('bias', [y.get_shape()[-1]], dtype=dtype) else: weights = vs.get_variable( 'weights', [x.get_shape()[1], y.get_shape()[-1]], initializer=init_ops.random_normal_initializer(init_mean, init_stddev, dtype=dtype), dtype=dtype) bias = vs.get_variable( 'bias', [y.get_shape()[-1]], initializer=init_ops.random_normal_initializer(init_mean, init_stddev, dtype=dtype), dtype=dtype) summary.histogram('%s.weights' % scope_name, weights) summary.histogram('%s.bias' % scope_name, bias) # If no class weight provided, try to retrieve one from pre-defined # tensor name in the graph. if not class_weight: try: class_weight = ops.get_default_graph().get_tensor_by_name( 'class_weight:0') except KeyError: pass return losses_ops.softmax_classifier(x, y, weights, bias, class_weight=class_weight)
def _add_layer_summary(value, tag): summary.scalar("%s/fraction_of_zero_values" % tag, nn.zero_fraction(value)) summary.histogram("%s/activation" % tag, value)
def _make_histogram_ops(self, model): """Defines histogram ops when histogram_freq > 0.""" # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients( model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [ grad.values if is_indexed_slices(grad) else grad for grad in grads ] tf_summary.histogram( '{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): if isinstance(layer.output, list): for i, output in enumerate(layer.output): tf_summary.histogram( '{}_out_{}'.format(layer.name, i), output) else: tf_summary.histogram('{}_out'.format(layer.name), layer.output)