def _build_network(features, mode, params): """Builds the network for different values of params['use_bfloat16'].""" if params['use_bfloat16']: with bfloat16.bfloat16_scope(): outputs_to_scales_to_logits = multi_scale_logits( features, params['model_options'], params['image_pyramid'], weight_decay=0.0, is_training=mode == tf.estimator.ModeKeys.TRAIN, fine_tune_batch_norm=(params['fine_tune_batch_norm'] if mode == tf.estimator.ModeKeys.TRAIN else False)) for level, output in outputs_to_scales_to_logits.iteritems(): for scale, logits in output.iteritems(): outputs_to_scales_to_logits[level][scale] = tf.cast( logits, tf.float32) else: outputs_to_scales_to_logits = multi_scale_logits( features, params['model_options'], params['image_pyramid'], weight_decay=params['weight_decay'], is_training=mode == tf.estimator.ModeKeys.TRAIN, fine_tune_batch_norm=(params['fine_tune_batch_norm'] if mode == tf.estimator.ModeKeys.TRAIN else False)) return outputs_to_scales_to_logits
def build_network(): if FLAGS.precision == 'bfloat16': with bfloat16.bfloat16_scope(): logits, end_points = inception.inception_v3( features, num_classes, is_training=is_training) logits = tf.cast(logits, tf.float32) elif FLAGS.precision == 'float32': logits, end_points = inception.inception_v3( features, num_classes, is_training=is_training) return logits, end_points
def testRequestedDType(self): """Test if requested dtype is honored in the getter. """ with bfloat16.bfloat16_scope() as scope: v1 = variable_scope.get_variable("v1", []) self.assertEqual(v1.dtype.base_dtype, dtypes.float32) v2 = variable_scope.get_variable("v2", [], dtype=dtypes.bfloat16) self.assertEqual(v2.dtype.base_dtype, dtypes.bfloat16) self.assertEqual([dtypes.float32, dtypes.float32], [v.dtype.base_dtype for v in scope.global_variables()])
def _model_fn(images, source_id, raw_shape, params, model): """Model defination for the SSD model based on ResNet-50. Args: images: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. source_id: a Tensor with shape [batch_size] raw_shape: a Tensor with shape [batch_size, 3] params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the SSD model outputs class logits and box regression outputs. Returns: spec: the EstimatorSpec or TPUEstimatorSpec to run training, evaluation, or prediction. """ features = images def _model_outputs(): return model(features, params, is_training_bn=False) if params['use_bfloat16']: with bfloat16.bfloat16_scope(): cls_outputs, box_outputs = _model_outputs() levels = cls_outputs.keys() for level in levels: cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32) box_outputs[level] = tf.cast(box_outputs[level], tf.float32) else: cls_outputs, box_outputs = _model_outputs() levels = cls_outputs.keys() flattened_cls, flattened_box = concat_outputs(cls_outputs, box_outputs) y_min, x_min, y_max, x_max = tf.split(flattened_box, 4, axis=1) flattened_box = tf.concat([x_min, y_min, x_max, y_max], axis=1) # [batch_size, 4, N] to [batch_size, N, 4] flattened_box = tf.transpose(flattened_box, [0, 2, 1]) anchors = tf.convert_to_tensor(DefaultBoxes()('ltrb')) decoded_boxes = decode_boxes(encoded_boxes=flattened_box, anchors=anchors, weights=ssd_constants.BOX_CODER_SCALES) pred_scores = tf.nn.softmax(flattened_cls, axis=1) pred_scores, indices = select_top_k_scores( pred_scores, ssd_constants.MAX_NUM_EVAL_BOXES) detections = non_max_suppression(scores_in=pred_scores, boxes_in=decoded_boxes, top_k_indices=indices, source_id=source_id, raw_shape=raw_shape) return detections
def model_fn(features, labels, mode, params): """TPUEstimatorSpec for the Squeezenet model.""" is_training = mode == tf.estimator.ModeKeys.TRAIN with bfloat16.bfloat16_scope(): logits = squeezenet(features, is_training=is_training, num_classes=params["num_classes"]) logits = tf.cast(logits, tf.float32) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) global_batch_size = params["num_shards"] * params["batch_size"] decay_steps = 1300 * 1000 * params["num_epochs"] // global_batch_size learning_rate = tf.train.polynomial_decay( params["lr"], global_step=tf.train.get_or_create_global_step(), end_learning_rate=params["min_lr"], decay_steps=decay_steps, power=1.0, cycle=False) # TODO(power): Hack copied from resnet: remove when summaries are working. lr_repeat = tf.reshape( tf.tile(tf.expand_dims(learning_rate, 0), [ params["batch_size"], ]), [params["batch_size"], 1]) if params["optimizer"] == "adam": optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) elif params["optimizer"] == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=params["momentum"], epsilon=1.0) else: optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=params["momentum"], use_nesterov=True) if params["use_tpu"]: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) train_op = optimizer.minimize(loss, tf.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, eval_metrics=(metric_fn, [labels, logits, lr_repeat]), predictions={ "classes": tf.argmax(input=logits, axis=1), "probabilities": tf.nn.softmax(logits, name="softmax_tensor") }, )
def unet_separator(features, labels, mode, params): # Define host call function def host_call_fn(gs, loss, lr, mix=None, gt_sources=None, est_sources=None): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. input: `Tensor` with shape `[batch, mix_samples, 1]` gt_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]` est_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]` Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer( model_config["model_base_dir"] + os.path.sep + str(model_config["experiment_id"])).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) if gs % 10000 == 0: with summary.record_summaries_every_n_global_steps( model_config["audio_summaries_every_n_steps"]): summary.audio('mix', mix, model_config['expected_sr'], max_outputs=model_config["num_sources"]) for source_id in range(gt_sources.shape[1].value): summary.audio('gt_sources_{source_id}'.format( source_id=source_id), gt_sources[:, source_id, :, :], model_config['expected_sr'], max_outputs=model_config["num_sources"]) summary.audio('est_sources_{source_id}'.format( source_id=source_id), est_sources[:, source_id, :, :], model_config['expected_sr'], max_outputs=model_config["num_sources"]) return summary.all_summary_ops() mix = features['mix'] conditioning = features['labels'] sources = labels model_config = params disc_input_shape = [ model_config["batch_size"], model_config["num_frames"], 0 ] with bfloat16.bfloat16_scope(): separator_class = Models.ConditionalUnetAudioSeparator.UnetAudioSeparator( model_config["num_layers"], model_config["num_initial_filters"], output_type=model_config["output_type"], context=model_config["context"], mono=model_config["mono_downmix"], upsampling=model_config["upsampling"], num_sources=model_config["num_sources"], filter_size=model_config["filter_size"], merge_filter_size=model_config["merge_filter_size"]) sep_input_shape, sep_output_shape = separator_class.get_padding( np.array(disc_input_shape)) # Input context that the input audio has to be padded ON EACH SIDE # TODO move this to dataset function assert mix.shape[1].value == sep_input_shape[1] if mode != tf.estimator.ModeKeys.PREDICT: pad_tensor = tf.constant([[0, 0], [0, 0], [2, 3], [0, 0]]) sources = tf.pad(sources, pad_tensor, "CONSTANT") separator_func = separator_class.get_output # Compute loss. separator_sources = tf.stack(separator_func( mix, conditioning, True, not model_config["raw_audio_loss"], reuse=False), axis=1) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'mix': mix, 'sources': separator_sources, 'filename': features['filename'], 'sample_id': features['sample_id'] } return tpu_estimator.TPUEstimatorSpec(mode, predictions=predictions) separator_loss = tf.cast( tf.reduce_sum(tf.squared_difference(sources, separator_sources)), tf.float32) if mode != tf.estimator.ModeKeys.PREDICT: global_step = tf.train.get_global_step() sep_lr = tf.train.exponential_decay(model_config['init_sup_sep_lr'], global_step, model_config['decay_steps'], model_config['decay_rate'], staircase=False, name=None) gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(separator_loss, [1]) lr_t = tf.reshape(sep_lr, [1]) if model_config["write_audio_summaries"]: host_call = (host_call_fn, [gs_t, loss_t, lr_t, mix, sources, separator_sources]) else: host_call = (host_call_fn, [ gs_t, loss_t, lr_t, tf.zeros((1)), tf.zeros((1)), tf.zeros((1)) ]) # Creating evaluation estimator if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, predictions): mean_mse_loss = tf.metrics.mean_squared_error(labels, predictions) return {'mse': mean_mse_loss} eval_params = {'labels': sources, 'predictions': separator_sources} return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=separator_loss, host_call=host_call, eval_metrics=(metric_fn, eval_params)) # Create training op. # TODO add learning rate schedule # TODO add early stopping if mode == tf.estimator.ModeKeys.TRAIN: separator_vars = Utils.getTrainableVariables("separator") print("Sep_Vars: " + str(Utils.getNumParams(separator_vars))) print("Num of variables: " + str(len(tf.global_variables()))) separator_solver = tf.train.AdamOptimizer(learning_rate=sep_lr) if model_config["use_tpu"]: separator_solver = tpu_optimizer.CrossShardOptimizer( separator_solver) train_op = separator_solver.minimize(separator_loss, var_list=separator_vars, global_step=global_step) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=separator_loss, host_call=host_call, train_op=train_op)
def resnet_model_fn(features, labels, mode, params): """The model_fn for ResNet to be used with TPUEstimator. Args: features: `Tensor` of batched images. labels: `Tensor` of labels for the data samples mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}` params: `dict` of parameters passed to the model from the TPUEstimator, `params['batch_size']` is always provided and should be used as the effective batch size. Returns: A `TPUEstimatorSpec` for the model """ if isinstance(features, dict): features = features['feature'] if FLAGS.data_format == 'channels_first': assert not FLAGS.transpose_input # channels_first only for GPU features = tf.transpose(features, [0, 3, 1, 2]) if FLAGS.transpose_input and mode != tf.estimator.ModeKeys.PREDICT: features = tf.transpose(features, [3, 0, 1, 2]) # HWCN to NHWC # Normalize the image to zero mean and unit variance. features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype) features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype) # This nested function allows us to avoid duplicating the logic which # builds the network, for different values of --precision. def build_network(): network = resnet_model.resnet_v1(resnet_depth=FLAGS.resnet_depth, num_classes=FLAGS.num_label_classes, data_format=FLAGS.data_format) return network(inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) if FLAGS.precision == 'bfloat16': with bfloat16.bfloat16_scope(): logits = build_network() logits = tf.cast(logits, tf.float32) elif FLAGS.precision == 'float32': logits = build_network() if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'classes': tf.argmax(logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'classify': tf.estimator.export.PredictOutput(predictions) }) # If necessary, in the model_fn, use params['batch_size'] instead the batch # size flags (--train_batch_size or --eval_batch_size). batch_size = params['batch_size'] # pylint: disable=unused-variable # Calculate loss, which includes softmax cross entropy and L2 regularization. one_hot_labels = tf.one_hot(labels, FLAGS.num_label_classes) cross_entropy = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=one_hot_labels) # Add weight decay to the loss for non-batch-normalization variables. loss = cross_entropy + FLAGS.weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) host_call = None if mode == tf.estimator.ModeKeys.TRAIN: # Compute the current epoch and associated learning rate from global_step. global_step = tf.train.get_global_step() batches_per_epoch = FLAGS.num_train_images / FLAGS.train_batch_size current_epoch = (tf.cast(global_step, tf.float32) / batches_per_epoch) learning_rate = learning_rate_schedule(current_epoch) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=FLAGS.momentum, use_nesterov=True) if FLAGS.use_tpu: # When using TPU, wrap the optimizer with CrossShardOptimizer which # handles synchronization details between different TPU cores. To the # user, this should look like regular synchronous training. optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) # Batch normalization requires UPDATE_OPS to be added as a dependency to # the train operation. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) if not FLAGS.skip_host_call: def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops() # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ predictions = tf.argmax(logits, axis=1) top_1_accuracy = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) top_5_accuracy = tf.metrics.mean(in_top_5) return { 'top_1_accuracy': top_1_accuracy, 'top_5_accuracy': top_5_accuracy, } eval_metrics = (metric_fn, [labels, logits]) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics)
def resnet_model_fn(features, labels, mode, params): """The model_fn for ResNet to be used with TPUEstimator. Args: features: `Tensor` of batched images. labels: `Tensor` of labels for the data samples mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL}` params: `dict` of parameters passed to the model from the TPUEstimator, `params['batch_size']` is always provided and should be used as the effective batch size. Returns: A `TPUEstimatorSpec` for the model """ if isinstance(features, dict): features = features['feature'] # In most cases, the default data format NCHW instead of NHWC should be # used for a significant performance boost on GPU/TPU. NHWC should be used # only if the network needs to be run on CPU since the pooling operations # are only supported on NHWC. if FLAGS.data_format == 'channels_first': features = tf.transpose(features, [0, 3, 1, 2]) if FLAGS.use_transpose: features = tf.transpose(features, [3, 0, 1, 2]) # HWCN to NHCW with bfloat16.bfloat16_scope(): network = resnet_model_v2.resnet_v2( resnet_size=FLAGS.resnet_depth, num_classes=LABEL_CLASSES, #data_format=FLAGS.data_format) ) logits = network(inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) logits = tf.cast(logits, tf.float32) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'classes': tf.argmax(logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'classify': tf.estimator.export.PredictOutput(predictions) }) # If necessary, in the model_fn, use params['batch_size'] instead the batch # size flags (--train_batch_size or --eval_batch_size). batch_size = params['batch_size'] # pylint: disable=unused-variable # Calculate loss, which includes softmax cross entropy and L2 regularization. one_hot_labels = tf.one_hot(labels, LABEL_CLASSES) cross_entropy = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=one_hot_labels) # Add weight decay to the loss for non-batch-normalization variables. loss = cross_entropy + WEIGHT_DECAY * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) host_call = None if mode == tf.estimator.ModeKeys.TRAIN: # Compute the current epoch and associated learning rate from global_step. global_step = tf.train.get_global_step() steps_per_epoch = NUM_TRAIN_IMAGES / FLAGS.train_batch_size current_epoch = (tf.cast(global_step, tf.float32) / steps_per_epoch) learning_rate = learning_rate_schedule(current_epoch) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=MOMENTUM, use_nesterov=True) if FLAGS.use_tpu: # When using TPU, wrap the optimizer with CrossShardOptimizer which # handles synchronization details between different TPU cores. To the # user, this should look like regular synchronous training. optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) # Batch normalization requires UPDATE_OPS to be added as a dependency to # the train operation. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly broadcasted to # [params['batch_size'], ]. gs_t = tf.reshape(tf.cast(global_step, tf.int32), [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch, ]` for the global_step loss: `Tensor` with shape `[batch, ]` for the training loss. lr: `Tensor` with shape `[batch, ]` for the learning_rate. ce: `Tensor` with shape `[batch, ]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ # Outfeed supports int32 but global_step is expected to be int64. gs = tf.cast(tf.reduce_mean(gs), tf.int64) with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', tf.reduce_mean(loss), step=gs) summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) summary.scalar('current_epoch', tf.reduce_mean(ce), step=gs) return summary.all_summary_ops() if FLAGS.enable_hostcall: host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch, ]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ predictions = tf.argmax(logits, axis=1) top_1_accuracy = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) top_5_accuracy = tf.metrics.mean(in_top_5) return { 'top_1_accuracy': top_1_accuracy, 'top_5_accuracy': top_5_accuracy, } eval_metrics = (metric_fn, [labels, logits]) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics)
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None): """Model defination for the RetinaNet model based on ResNet. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the RetinaNet model outputs class logits and box regression outputs. variable_filter_fn: the filter function that takes trainable_variables and returns the variable list after applying the filter rule. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. """ def _model_outputs(): return model( features, min_level=params['min_level'], max_level=params['max_level'], num_classes=params['num_classes'], num_anchors=len(params['aspect_ratios'] * params['num_scales']), resnet_depth=params['resnet_depth'], is_training_bn=params['is_training_bn']) if params['use_bfloat16']: with bfloat16.bfloat16_scope(): cls_outputs, box_outputs = _model_outputs() levels = cls_outputs.keys() for level in levels: cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32) box_outputs[level] = tf.cast(box_outputs[level], tf.float32) else: cls_outputs, box_outputs = _model_outputs() levels = cls_outputs.keys() # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'image': features, } for level in levels: predictions['cls_outputs_%d' % level] = cls_outputs[level] predictions['box_outputs_%d' % level] = box_outputs[level] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Load pretrained model from checkpoint. if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN: def scaffold_fn(): """Loads pretrained model through scaffold function.""" tf.train.init_from_checkpoint(params['resnet_checkpoint'], { '/': 'resnet%s/' % params['resnet_depth'], }) return tf.train.Scaffold() else: scaffold_fn = None # Set up training loss and learning rate. global_step = tf.train.get_global_step() learning_rate = _learning_rate_schedule( params['learning_rate'], params['lr_warmup_init'], params['lr_warmup_step'], params['lr_drop_step'], global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs, labels, params) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=params['momentum']) if params['use_tpu']: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = variable_filter_fn( tf.trainable_variables(), params['resnet_depth']) if variable_filter_fn else None with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step, var_list=var_list) else: train_op = None # Evaluation only works on GPU/CPU host and batch_size=1 eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Evaluation metric fn. Performed on CPU, do not reference TPU ops.""" eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes']) cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) # add metrics to output cls_outputs = {} box_outputs = {} for level in range(params['min_level'], params['max_level'] + 1): cls_outputs[level] = kwargs['cls_outputs_%d' % level] box_outputs[level] = kwargs['box_outputs_%d' % level] detections = anchor_labeler.generate_detections( cls_outputs, box_outputs, kwargs['source_ids']) eval_metric = coco_metric.EvaluationMetric(params['val_json_file']) coco_metrics = eval_metric.estimator_metric_fn(detections, kwargs['image_scales']) # Add metrics to output. output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics batch_size = params['batch_size'] cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [ batch_size, ]), [batch_size, 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [ batch_size, ]), [batch_size, 1]) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'source_ids': labels['source_ids'], 'image_scales': labels['image_scales'], } for level in range(params['min_level'], params['max_level'] + 1): metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level] metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level] eval_metrics = (metric_fn, metric_fn_inputs) return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
def resnet_model_fn(features, labels, mode, params): """The model_fn for ResNet to be used with TPUEstimator. Args: features: `Tensor` of batched images. labels: `Tensor` of labels for the data samples mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}` params: `dict` of parameters passed to the model from the TPUEstimator, `params['batch_size']` is always provided and should be used as the effective batch size. Returns: A `TPUEstimatorSpec` for the model """ if isinstance(features, dict): features = features['feature'] if FLAGS.data_format == 'channels_first': assert not FLAGS.transpose_input # channels_first only for GPU features = tf.transpose(features, [0, 3, 1, 2]) if FLAGS.transpose_input and mode != tf.estimator.ModeKeys.PREDICT: features = tf.transpose(features, [3, 0, 1, 2]) # HWCN to NHWC # Normalize the image to zero mean and unit variance. features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype) features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype) # This nested function allows us to avoid duplicating the logic which # builds the network, for different values of --precision. def build_network(): network = resnet_model.resnet_v1( resnet_depth=FLAGS.resnet_depth, num_classes=LABEL_CLASSES, data_format=FLAGS.data_format) return network( inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) if FLAGS.precision == 'bfloat16': with bfloat16.bfloat16_scope(): logits = build_network() logits = tf.cast(logits, tf.float32) elif FLAGS.precision == 'float32': logits = build_network() if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'classes': tf.argmax(logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'classify': tf.estimator.export.PredictOutput(predictions) }) # If necessary, in the model_fn, use params['batch_size'] instead the batch # size flags (--train_batch_size or --eval_batch_size). batch_size = params['batch_size'] # pylint: disable=unused-variable # Calculate loss, which includes softmax cross entropy and L2 regularization. one_hot_labels = tf.one_hot(labels, LABEL_CLASSES) cross_entropy = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=one_hot_labels) # Add weight decay to the loss for non-batch-normalization variables. loss = cross_entropy + FLAGS.weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name]) host_call = None if mode == tf.estimator.ModeKeys.TRAIN: # Compute the current epoch and associated learning rate from global_step. global_step = tf.train.get_global_step() batches_per_epoch = NUM_TRAIN_IMAGES / FLAGS.train_batch_size current_epoch = (tf.cast(global_step, tf.float32) / batches_per_epoch) learning_rate = learning_rate_schedule(current_epoch) optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=FLAGS.momentum, use_nesterov=True) if FLAGS.use_tpu: # When using TPU, wrap the optimizer with CrossShardOptimizer which # handles synchronization details between different TPU cores. To the # user, this should look like regular synchronous training. optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) # Batch normalization requires UPDATE_OPS to be added as a dependency to # the train operation. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) if not FLAGS.skip_host_call: def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops() # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ predictions = tf.argmax(logits, axis=1) top_1_accuracy = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) top_5_accuracy = tf.metrics.mean(in_top_5) return { 'top_1_accuracy': top_1_accuracy, 'top_5_accuracy': top_5_accuracy, } eval_metrics = (metric_fn, [labels, logits]) return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics)
def _model_fn(features, labels, mode, params, model): """Model defination for the SSD model based on ResNet-50. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the SSD model outputs class logits and box regression outputs. Returns: spec: the EstimatorSpec or TPUEstimatorSpec to run training, evaluation, or prediction. """ if mode == tf.estimator.ModeKeys.PREDICT: labels = features features = labels.pop('image') # Manually apply the double transpose trick for training data. if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT: features = tf.transpose(features, [3, 0, 1, 2]) labels[ssd_constants.BOXES] = tf.transpose(labels[ssd_constants.BOXES], [2, 0, 1]) labels[ssd_constants.CLASSES] = tf.transpose( labels[ssd_constants.CLASSES], [2, 0, 1]) # Normalize the image to zero mean and unit variance. mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_MEAN, value=ssd_constants.NORMALIZATION_MEAN) mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_STD, value=ssd_constants.NORMALIZATION_STD) features -= tf.constant(ssd_constants.NORMALIZATION_MEAN, shape=[1, 1, 3], dtype=features.dtype) features /= tf.constant(ssd_constants.NORMALIZATION_STD, shape=[1, 1, 3], dtype=features.dtype) def _model_outputs(): return model(features, params, is_training_bn=(mode == tf.estimator.ModeKeys.TRAIN)) if params['use_bfloat16']: with bfloat16.bfloat16_scope(): cls_outputs, box_outputs = _model_outputs() levels = cls_outputs.keys() for level in levels: cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32) box_outputs[level] = tf.cast(box_outputs[level], tf.float32) else: cls_outputs, box_outputs = _model_outputs() levels = cls_outputs.keys() # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: flattened_cls, flattened_box = concat_outputs(cls_outputs, box_outputs) mlperf_log.ssd_print(key=mlperf_log.SCALES, value=ssd_constants.BOX_CODER_SCALES) ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=ssd_constants.BOX_CODER_SCALES) anchors = box_list.BoxList( tf.convert_to_tensor(dataloader.DefaultBoxes()('ltrb'))) decoded_boxes = box_coder.batch_decode(encoded_boxes=flattened_box, box_coder=ssd_box_coder, anchors=anchors) pred_scores = tf.nn.softmax(flattened_cls, axis=2) pred_scores, indices = select_top_k_scores( pred_scores, ssd_constants.MAX_NUM_EVAL_BOXES) predictions = dict( labels, indices=indices, pred_scores=pred_scores, pred_box=decoded_boxes, ) if params['visualize_dataloader']: # this is for inference visualization. predictions['image'] = features if params['use_tpu']: return tpu_estimator.TPUEstimatorSpec(mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Load pretrained model from checkpoint. if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN: def scaffold_fn(): """Loads pretrained model through scaffold function.""" tf.train.init_from_checkpoint( params['resnet_checkpoint'], { '/': 'resnet%s/' % ssd_constants.RESNET_DEPTH, }) return tf.train.Scaffold() else: scaffold_fn = None # Set up training loss and learning rate. update_learning_rate_schedule_parameters(params) global_step = tf.train.get_or_create_global_step() learning_rate = learning_rate_schedule(params, global_step) mlperf_log.ssd_print(key=mlperf_log.OPT_LR, deferred=True) # cls_loss and box_loss are for logging. only total_loss is optimized. total_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs, labels) total_loss += params['weight_decay'] * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) host_call = None if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=ssd_constants.MOMENTUM) if params['use_tpu']: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) mlperf_log.ssd_print(key=mlperf_log.OPT_NAME, value='tf.train.MomentumOptimizer') # TODO(wangtao): figure out how to log learning rate. # mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=learning_rate) mlperf_log.ssd_print(key=mlperf_log.OPT_MOMENTUM, value=ssd_constants.MOMENTUM) mlperf_log.ssd_print(key=mlperf_log.OPT_WEIGHT_DECAY, value=params['weight_decay']) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if params['device'] == 'gpu': # GPU uses tf.group to avoid dependency overhead on update_ops; also, # multi-GPU requires a different EstimatorSpec class object train_op = tf.group(optimizer.minimize(total_loss, global_step), update_ops) return model_fn_lib.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn()) else: with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step) if params['use_host_call']: def host_call_fn(global_step, total_loss, cls_loss, box_loss, learning_rate): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: global_step: `Tensor with shape `[batch, ]` for the global_step. total_loss: `Tensor` with shape `[batch, ]` for the training loss. cls_loss: `Tensor` with shape `[batch, ]` for the training cls loss. box_loss: `Tensor` with shape `[batch, ]` for the training box loss. learning_rate: `Tensor` with shape `[batch, ]` for the learning_rate. Returns: List of summary ops to run on the CPU host. """ # Outfeed supports int32 but global_step is expected to be int64. global_step = tf.reduce_mean(global_step) # Host call fns are executed FLAGS.iterations_per_loop times after one # TPU loop is finished, setting max_queue value to the same as number of # iterations will make the summary writer only flush the data to storage # once per loop. with (tf.contrib.summary.create_file_writer( params['model_dir'], max_queue=params['iterations_per_loop']).as_default()): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('total_loss', tf.reduce_mean(total_loss), step=global_step) tf.contrib.summary.scalar('cls_loss', tf.reduce_mean(cls_loss), step=global_step) tf.contrib.summary.scalar('box_loss', tf.reduce_mean(box_loss), step=global_step) tf.contrib.summary.scalar( 'learning_rate', tf.reduce_mean(learning_rate), step=global_step) return tf.contrib.summary.all_summary_ops() # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. global_step_t = tf.reshape(global_step, [1]) total_loss_t = tf.reshape(total_loss, [1]) cls_loss_t = tf.reshape(cls_loss, [1]) box_loss_t = tf.reshape(box_loss, [1]) learning_rate_t = tf.reshape(learning_rate, [1]) host_call = (host_call_fn, [ global_step_t, total_loss_t, cls_loss_t, box_loss_t, learning_rate_t ]) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: raise NotImplementedError return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
def testScopeName(self): """Test if name for the variable scope is propogated correctly. """ with bfloat16.bfloat16_scope() as bf: self.assertEqual(bf.name, "bfloat16")
def model_fn(features, labels, mode, params): """ The model_fn for dontbeturtle model to be used with TPUEstimator. Args: features: `Tensor` of batched input images <batchNum x M x M x 3>. labels: labels_heatmap_list labels = [ [labels_head], [label_neck], [label_rshoulder], [label_lshoulder] ] where has shape <batchNum N x N x 4> mode: one of `tf.estimator.ModeKeys. { - TRAIN (default) : for weight training ( running forward + backward + metric) - EVAL, : for validation (running forward + metric) - PREDICT : for prediction ( running forward only ) }` Returns: A `TPUEstimatorSpec` for the model """ del params # unused if isinstance(features, dict): features = features['feature'] if FLAGS.data_format == 'channels_first': assert not FLAGS.transpose_input # channels_first only for GPU features = tf.transpose(features, [0, 3, 1, 2]) if FLAGS.transpose_input and mode != tf.estimator.ModeKeys.PREDICT: features = tf.transpose(features, [3, 0, 1, 2]) # HWCN to NHWC with tf.name_scope(name='feature_norm', values=[features]): # Standardization to the image by zero mean and unit variance. features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype) features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype) # set input_shape features.set_shape(features.get_shape().merge_with( tf.TensorShape([ None, model_config.input_height, model_config.input_width, None ]))) # Model building ============================ # This nested function allows us to avoid duplicating the logic which # builds the network, for different values of --precision. def build_network(): with tf.name_scope(name='build_network'): ''' get model ''' out_heatmap, mid_heatmap, end_points\ = get_model(ch_in = features, model_config = model_config, scope = 'model') '''specify is_trainable on model ''' if mode == tf.estimator.ModeKeys.TRAIN: model_config.hg_config.is_trainable = True model_config.sv_config.is_trainable = True model_config.rc_config.is_trainable = True model_config.out_config.is_trainable = True elif (mode == tf.estimator.ModeKeys.EVAL) or \ (mode == tf.estimator.ModeKeys.PREDICT): model_config.hg_config.is_trainable = False model_config.sv_config.is_trainable = False model_config.rc_config.is_trainable = False model_config.out_config.is_trainable = False tf.logging.info('[model_fn] feature shape=%s' % features.get_shape().as_list()) tf.logging.info('[model_fn] labels shape=%s' % labels.get_shape().as_list()) tf.logging.info('[model_fn] out_heatmap shape=%s' % out_heatmap.get_shape().as_list()) tf.logging.info( '-----------------------------------------------------------') for n in range(0, model_config.num_of_hgstacking): tf.logging.info('[model_fn] mid_heatmap%d shape=%s' % (n, mid_heatmap[n].get_shape().as_list())) return out_heatmap, mid_heatmap, end_points if FLAGS.precision == 'bfloat16': with bfloat16.bfloat16_scope(): logits_out_heatmap, \ logits_mid_heatmap, \ end_points = build_network() logits_out_heatmap = tf.cast(logits_out_heatmap, tf.float32) else: # FLAGS.precision == 'float32': logits_out_heatmap, \ logits_mid_heatmap, \ end_points = build_network() #-------------------------------------------------------- # mode == prediction case manipulation =================== # [[[ here need to change ]]] ----- # if mode == tf.estimator.ModeKeys.PREDICT: # predictions = { # # # output format should be clarify here # 'pred_head': tf.argmax(logits_heatmap_out[-1,], axis=1), # 'conf_head': tf.nn.softmax(logits, name='confidence_head') # } # # # if the prediction case return here # return tf.estimator.EstimatorSpec( # mode=mode, # predictions=predictions, # export_outputs={ # 'classify': tf.estimator.export.PredictOutput(predictions) # }) # ----------------------------- ### output layer === with tf.name_scope(name='out_post_proc', values=[logits_out_heatmap, labels]): # heatmap activation of output layer out act_out_heatmaps = get_heatmap_activation(logits=logits_out_heatmap, scope='out_heatmap') # heatmap loss total_out_losssum = \ get_loss_heatmap(pred_heatmaps=act_out_heatmaps, label_heatmaps=labels, scope='out_loss') ### middle layer === with tf.name_scope(name='mid_post_proc', values=[logits_mid_heatmap, labels]): ### supervision layers === act_mid_heatmap_list = [] total_mid_losssum_list = [] total_mid_losssum_acc = 0.0 for stacked_hg_index in range(0, model_config.num_of_hgstacking): # heatmap activation of supervision layer out act_mid_heatmap_temp = \ get_heatmap_activation(logits=logits_mid_heatmap[stacked_hg_index], scope='mid_heatmap_' + str(stacked_hg_index)) # heatmap loss total_mid_losssum_temp = \ get_loss_heatmap(pred_heatmaps=act_mid_heatmap_temp, label_heatmaps=labels, scope='mid_loss_' + str(stacked_hg_index)) # collect loss and heatmap in list act_mid_heatmap_list.append(act_mid_heatmap_temp) total_mid_losssum_list.append(total_mid_losssum_temp) total_mid_losssum_acc += total_mid_losssum_temp ### total loss === with tf.name_scope(name='total_loss', values=[total_out_losssum, total_mid_losssum_acc]): # Collect weight regularizer loss ===== loss_regularizer = tf.losses.get_regularization_loss() # sum up all losses ===== loss = total_out_losssum + total_mid_losssum_acc + loss_regularizer host_call = None summary_hook = None train_op = None if mode == tf.estimator.ModeKeys.TRAIN: # Compute the current epoch and associated learning rate from global_step. global_step = tf.train.get_global_step() batchnum_per_epoch = np.floor(FLAGS.num_train_images / FLAGS.train_batch_size) current_epoch = (tf.cast(global_step, tf.float32) / batchnum_per_epoch) learning_rate = learning_rate_schedule(current_epoch=current_epoch) optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, name='RMSprop_opt') if FLAGS.use_tpu: # When using TPU, wrap the optimizer with CrossShardOptimizer which # handles synchronization details between different TPU cores. To the # user, this should look like regular synchronous training. optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) ''' # Batch normalization requires UPDATE_OPS to be added as a dependency to # the train operation. # when training, the moving_mean and moving_variance need to be updated. ''' update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) if FLAGS.is_tensorboard_summary: # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [model_config['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) # mid_loss_list_t = [] # for n in range(0,model_config.num_of_hgstacking): # mid_loss_list_t[n] = tf.reshape(mid_loss_list[n],[1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) if FLAGS.use_tpu: # host_call = (tb_summary_fn_tpu, [gs_t, loss_t,mid_loss_list_t, lr_t, ce_t]) host_call = (tb_summary_fn_tpu, [gs_t, loss_t, lr_t, ce_t]) else: ## create tflog dir now = datetime.utcnow().strftime("%Y%m%d%H%M%S") tb_logdir_path = FLAGS.tflogs_dir tb_logdir = "{}/run-{}/".format(tb_logdir_path, now) tf.logging.info('[model_fn] tf summary at %s' % tb_logdir) if not tf.gfile.Exists(tb_logdir_path): tf.gfile.MakeDirs(tb_logdir_path) tf.summary.scalar('loss', loss) for n in range(0, model_config.num_of_hgstacking): summary.scalar('mid_loss_head' + str(n), total_mid_losssum_list[n]) summary.scalar('mid_loss_neck' + str(n), total_mid_losssum_list[n]) summary.scalar('mid_loss_Rshoulder' + str(n), total_mid_losssum_list[n]) summary.scalar('mid_loss_Lshoulder' + str(n), total_mid_losssum_list[n]) tf.summary.scalar('learning_rate', learning_rate) tf.summary.scalar('current_epoch', current_epoch) tf.logging.info('Create SummarySaveHook.') summary_hook = tf.train.SummarySaverHook( save_steps=FLAGS.summary_step, output_dir=tb_logdir, summary_op=tf.summary.merge_all()) if FLAGS.use_tpu: # in case of TPUEstimator metric_ops must be in a form of tuple metric_ops = (metric_fn, [labels, logits_out_heatmap]) tfestimator = tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=metric_ops) else: # in case of Estimator metric_ops must be in a form of dictionary metric_ops = metric_fn(labels, logits_out_heatmap) tfestimator = tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metric_ops=metric_ops, training_hooks=[summary_hook]) return tfestimator
def inception_model_fn(features, labels, mode, params): """Inception v2 model using Estimator API.""" num_classes = FLAGS.num_classes is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_eval = (mode == tf.estimator.ModeKeys.EVAL) features = tensor_transform_fn(features, params['input_perm']) with bfloat16.bfloat16_scope(): if FLAGS.clear_update_collections: # updates_collections must be set to None in order to use fused batchnorm with arg_scope( inception.inception_v2_arg_scope( batch_norm_decay=BATCH_NORM_DECAY, batch_norm_epsilon=BATCH_NORM_EPSILON, updates_collections=None)): logits, end_points = inception.inception_v2( features, num_classes, is_training=is_training, replace_separable_convolution=True) else: with arg_scope( inception.inception_v2_arg_scope( batch_norm_decay=BATCH_NORM_DECAY, batch_norm_epsilon=BATCH_NORM_EPSILON)): logits, end_points = inception.inception_v2( features, num_classes, is_training=is_training, replace_separable_convolution=True) logits = tf.cast(logits, tf.float32) for k in end_points.keys(): end_points[k] = tf.cast(end_points[k], tf.float32) predictions = end_points predictions.update({ 'classes': tf.argmax(input=logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') }) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and ( not FLAGS.use_tpu): with tf.control_dependencies([ tf.Print(predictions['classes'], [predictions['classes']], summarize=FLAGS.eval_batch_size, message='prediction: ') ]): labels = tf.Print(labels, [labels], summarize=FLAGS.eval_batch_size, message='label: ') one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32) loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits, weights=1.0, label_smoothing=0.1) #loss = tf.losses.get_total_loss(add_regularization_losses=True) loss += WEIGHT_DECAY * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256 if FLAGS.use_learning_rate_warmup: # Adjust initial learning rate to match final warmup rate warmup_decay = FLAGS.learning_rate_decay**( (FLAGS.warmup_epochs + FLAGS.cold_epochs) / FLAGS.learning_rate_decay_epochs) adj_initial_learning_rate = initial_learning_rate * warmup_decay final_learning_rate = 0.0001 * initial_learning_rate host_call = None train_op = None if is_training: batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size global_step = tf.train.get_or_create_global_step() current_epoch = tf.cast( (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32) learning_rate = tf.train.exponential_decay( learning_rate=initial_learning_rate, global_step=global_step, decay_steps=int(FLAGS.learning_rate_decay_epochs * batches_per_epoch), decay_rate=FLAGS.learning_rate_decay, staircase=True) if FLAGS.use_learning_rate_warmup: wlr = 0.1 * adj_initial_learning_rate wlr_height = tf.cast( 0.9 * adj_initial_learning_rate / (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1), tf.float32) epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32) exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs + FLAGS.learning_rate_decay_epochs) lin_inc_lr = tf.add( wlr, tf.multiply( tf.cast(tf.subtract(current_epoch, epoch_offset), tf.float32), wlr_height)) learning_rate = tf.where( tf.greater_equal(current_epoch, FLAGS.cold_epochs), (tf.where(tf.greater_equal(current_epoch, exp_decay_start), learning_rate, lin_inc_lr)), wlr) # Set a minimum boundary for the learning rate. learning_rate = tf.maximum(learning_rate, final_learning_rate, name='learning_rate') if FLAGS.optimizer == 'sgd': tf.logging.info('Using SGD optimizer') optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) elif FLAGS.optimizer == 'momentum': tf.logging.info('Using Momentum optimizer') optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) elif FLAGS.optimizer == 'RMS': tf.logging.info('Using RMS optimizer') optimizer = tf.train.RMSPropOptimizer(learning_rate, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) else: tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer) if FLAGS.use_tpu: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) if FLAGS.moving_average: ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY, num_updates=global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) with tf.control_dependencies([train_op ]), tf.name_scope('moving_average'): train_op = ema.apply(variables_to_average) # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', tf.reduce_mean(loss), step=gs) summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) summary.scalar('current_epoch', tf.reduce_mean(ce), step=gs) return summary.all_summary_ops() host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) eval_metrics = None if is_eval: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch, ]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ predictions = tf.argmax(logits, axis=1) top_1_accuracy = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) top_5_accuracy = tf.metrics.mean(in_top_5) return { 'accuracy': top_1_accuracy, 'accuracy@5': top_5_accuracy, } eval_metrics = (metric_fn, [labels, logits]) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics)
def model_fn(features, labels, mode, params): """Mobilenet v1 model using Estimator API.""" num_classes = FLAGS.num_classes training_active = (mode == tf.estimator.ModeKeys.TRAIN) eval_active = (mode == tf.estimator.ModeKeys.EVAL) features = tensor_transform_fn(features, params['input_perm']) with bfloat16.bfloat16_scope(): if FLAGS.clear_update_collections: # updates_collections must be set to None in order to use fused batchnorm with arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): logits, end_points = mobilenet_v1.mobilenet_v1( features, num_classes, is_training=training_active, depth_multiplier=FLAGS.depth_multiplier) else: with arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): logits, end_points = mobilenet_v1.mobilenet_v1( features, num_classes, is_training=training_active, depth_multiplier=FLAGS.depth_multiplier) logits = tf.cast(logits, tf.float32) for k in end_points.keys(): end_points[k] = tf.cast(end_points[k], tf.float32) predictions = { 'classes': tf.argmax(input=logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and ( not FLAGS.use_tpu): with tf.control_dependencies([ tf.Print(predictions['classes'], [predictions['classes']], summarize=FLAGS.eval_batch_size, message='prediction: ') ]): labels = tf.Print(labels, [labels], summarize=FLAGS.eval_batch_size, message='label: ') one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32) loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits, weights=1.0, label_smoothing=0.1) #loss = tf.losses.get_total_loss(add_regularization_losses=True) loss += WEIGHT_DECAY * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256 final_learning_rate = 0.0001 * initial_learning_rate train_op = None if training_active: batches_per_epoch = _NUM_TRAIN_IMAGES // FLAGS.train_batch_size global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay( learning_rate=initial_learning_rate, global_step=global_step, decay_steps=FLAGS.learning_rate_decay_epochs * batches_per_epoch, decay_rate=FLAGS.learning_rate_decay, staircase=True) # Set a minimum boundary for the learning rate. learning_rate = tf.maximum(learning_rate, final_learning_rate, name='learning_rate') if FLAGS.optimizer == 'sgd': tf.logging.info('Using SGD optimizer') optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) elif FLAGS.optimizer == 'momentum': tf.logging.info('Using Momentum optimizer') optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) elif FLAGS.optimizer == 'RMS': tf.logging.info('Using RMS optimizer') optimizer = tf.train.RMSPropOptimizer(learning_rate, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) else: tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer) if FLAGS.use_tpu: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) if FLAGS.moving_average: ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY, num_updates=global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) with tf.control_dependencies([train_op ]), tf.name_scope('moving_average'): train_op = ema.apply(variables_to_average) eval_metrics = None if eval_active: def metric_fn(labels, predictions): accuracy = tf.metrics.accuracy( labels, tf.argmax(input=predictions, axis=1)) return {'accuracy': accuracy} if FLAGS.use_logits: eval_predictions = logits else: eval_predictions = end_points['Predictions'] eval_metrics = (metric_fn, [labels, eval_predictions]) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metrics=eval_metrics)
def model_fn(features, labels, mode, params): # inference will happen in another way assert mode != tf.estimator.ModeKeys.PREDICT network = lambda images, is_training: shufflenet( images, is_training, num_classes=params['num_classes'], depth_multiplier=params['depth_multiplier']) # tensor `features` is a half precision tensor with shape [height, width, 3, batch_size], # it represents RGB images with values in [0, 1] images = features images = tf.transpose(images, [3, 0, 1, 2]) # HWCN to NHWC is_training = mode == tf.estimator.ModeKeys.TRAIN if params['use_bfloat16']: with bfloat16.bfloat16_scope(): logits = network(images, is_training) logits = tf.to_float(logits) # to full precision else: logits = network(images, is_training) with tf.name_scope('weight_decay'): add_weight_decay(params['weight_decay']) regularization_loss = tf.losses.get_regularization_loss() with tf.name_scope('cross_entropy'): one_hot_labels = tf.one_hot(labels, params['num_classes']) cross_entropy = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=one_hot_labels, label_smoothing=LABEL_SMOOTHING) total_loss = tf.losses.get_total_loss(add_regularization_losses=True) if mode == tf.estimator.ModeKeys.EVAL: return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, eval_metrics=(metric_fn, [labels, logits])) assert mode == tf.estimator.ModeKeys.TRAIN with tf.variable_scope('learning_rate_schedule'): global_step = tf.train.get_global_step() learning_rate = get_learning_rate(global_step, params) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops), tf.variable_scope('optimizer'): optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM, use_nesterov=USE_NESTEROV) optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) train_op = optimizer.minimize(total_loss, global_step) with tf.control_dependencies([train_op]), tf.name_scope('ema'): ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY, num_updates=global_step) train_op = ema.apply(tf.trainable_variables()) with tf.name_scope('train_accuracy_calculation'): predictions = tf.argmax(logits, axis=1, output_type=tf.int32) train_accuracy = tf.reduce_mean(tf.to_float( tf.equal(labels, predictions)), axis=0) tensors_to_summarize = [ tf.reshape(global_step, [1]), tf.reshape(total_loss, [1]), tf.reshape(cross_entropy, [1]), tf.reshape(regularization_loss, [1]), tf.reshape(learning_rate, [1]), tf.reshape(train_accuracy, [1]) ] def host_call_fn(global_step, total_loss, cross_entropy, regularization_loss, learning_rate, train_accuracy): global_step = global_step[0] with summary.create_file_writer( params['model_dir'], max_queue=params['iterations_per_loop']).as_default(): with summary.always_record_summaries(): summary.scalar('entire_loss', total_loss[0], step=global_step) summary.scalar('cross_entropy_loss', cross_entropy[0], step=global_step) summary.scalar('regularization_loss', regularization_loss[0], step=global_step) summary.scalar('learning_rate', learning_rate[0], step=global_step) summary.scalar('train_accuracy', train_accuracy[0], step=global_step) return summary.all_summary_ops() return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, host_call=(host_call_fn, tensors_to_summarize))
def model_fn(features, labels, mode, params): """Our model_fn for Densenet to be used with our Estimator.""" tf.logging.info("model_fn") with bfloat16.bfloat16_scope(): if FLAGS.network_depth == 169: logits = densenet_model.densenet_imagenet_169( features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) elif FLAGS.network_depth == 201: logits = densenet_model.densenet_imagenet_201( features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) elif FLAGS.network_depth == 121: logits = densenet_model.densenet_imagenet_121( features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) else: tf.logging.info("Number of layers not supported, revert to 121") logits = densenet_model.densenet_imagenet_121( features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) logits = tf.cast(logits, tf.float32) # Calculate loss, which includes softmax cross entropy and L2 regularization. cross_entropy = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels) # Add weight decay to the loss. We exclude weight decay on the batch # normalization variables because it slightly improves accuracy. loss = cross_entropy + _WEIGHT_DECAY * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if "batch_normalization" not in v.name ]) global_step = tf.train.get_global_step() current_epoch = (tf.cast(global_step, tf.float32) / params["batches_per_epoch"]) learning_rate = learning_rate_schedule(current_epoch) # TODO(chrisying): this is a hack to get the LR and epoch for Tensorboard. # Reimplement this when TPU training summaries are supported. lr_repeat = tf.reshape( tf.tile(tf.expand_dims(learning_rate, 0), [ params["batch_size"], ]), [params["batch_size"], 1]) ce_repeat = tf.reshape( tf.tile(tf.expand_dims(current_epoch, 0), [ params["batch_size"], ]), [params["batch_size"], 1]) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=_MOMENTUM) optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, logits, lr_repeat, ce_repeat): """Evaluation metric fn. Performed on CPU, do not reference TPU ops.""" predictions = tf.argmax(logits, axis=1) accuracy = tf.metrics.accuracy(tf.argmax(labels, axis=1), predictions) lr = tf.metrics.mean(lr_repeat) ce = tf.metrics.mean(ce_repeat) return { "accuracy": accuracy, "learning_rate": lr, "current_epoch": ce } eval_metrics = (metric_fn, [labels, logits, lr_repeat, ce_repeat]) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metrics=eval_metrics)