def testMinimize(self): """Ensure that minimize actually lowers the loss.""" with self.test_session(): w_init = np.random.randn(10) w = tf.Variable(w_init, dtype=dtypes.float32) loss = tf.reduce_sum(w * w) igt_opt = exp_igt_optimizer.ExpIgtOptimizer(learning_rate=0.01, tail_fraction=2.) igt_update = igt_opt.minimize(loss) tf_variables.global_variables_initializer().run() loss_pre = loss.eval() igt_update.run() loss_post = loss.eval() self.assertLess(loss_post, loss_pre)
def testSwap(self): with self.cached_session() as sess: v_init = np.random.randn(10) v = tf.Variable(v_init, dtype=dtypes.float32) loss = tf.reduce_sum(v * v) opt = exp_igt_optimizer.ExpIgtOptimizer(learning_rate=0.01, tail_fraction=2.) unused_igt_update = opt.minimize(loss) slot = opt.get_slot(v, 'true_param') tf_variables.global_variables_initializer().run() self.assertAllCloseAccordingToType(v_init, v.eval()) self.assertAllCloseAccordingToType(v_init, slot.eval()) zeros = np.zeros(10) sess.run(v.assign(zeros)) self.assertAllCloseAccordingToType(zeros, v.eval()) self.assertAllCloseAccordingToType(v_init, slot.eval()) swap_op = opt.swap_true_and_shifted() swap_op.run() self.assertAllCloseAccordingToType(v_init, v.eval()) self.assertAllCloseAccordingToType(zeros, slot.eval())
def resnet_model_fn(features, labels, mode, params): """The model_fn for ResNet to be used with TPUEstimator. Args: features: `Tensor` of batched images. If transpose_input is enabled, it is transposed to device layout and reshaped to 1D tensor. labels: `Tensor` of labels for the data samples mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}` params: `dict` of parameters passed to the model from the TPUEstimator, `params['batch_size']` is always provided and should be used as the effective batch size. Returns: A `TPUEstimatorSpec` for the model """ if isinstance(features, dict): features = features['feature'] # In most cases, the default data format NCHW instead of NHWC should be # used for a significant performance boost on GPU/TPU. NHWC should be used # only if the network needs to be run on CPU since the pooling operations # are only supported on NHWC. if params['data_format'] == 'channels_first': assert not params['transpose_input'] # channels_first only for GPU features = tf.transpose(features, [0, 3, 1, 2]) if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT: image_size = tf.sqrt(tf.shape(features)[0] / (3 * tf.shape(labels)[0])) features = tf.reshape(features, [image_size, image_size, 3, -1]) features = tf.transpose(features, [3, 0, 1, 2]) # HWCN to NHWC # Normalize the image to zero mean and unit variance. features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype) features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype) # DropBlock keep_prob for the 4 block groups of ResNet architecture. # None means applying no DropBlock at the corresponding block group. dropblock_keep_probs = [None] * 4 if params['dropblock_groups']: # Scheduled keep_prob for DropBlock. train_steps = tf.cast(params['train_steps'], tf.float32) current_step = tf.cast(tf.train.get_global_step(), tf.float32) current_ratio = current_step / train_steps dropblock_keep_prob = (1 - current_ratio * (1 - params['dropblock_keep_prob'])) # Computes DropBlock keep_prob for different block groups of ResNet. dropblock_groups = [ int(x) for x in params['dropblock_groups'].split(',') ] for block_group in dropblock_groups: if block_group < 1 or block_group > 4: raise ValueError( 'dropblock_groups should be a comma separated list of integers ' 'between 1 and 4 (dropblcok_groups: {}).'.format( params['dropblock_groups'])) dropblock_keep_probs[block_group - 1] = 1 - ( (1 - dropblock_keep_prob) / 4.0**(4 - block_group)) # This nested function allows us to avoid duplicating the logic which # builds the network, for different values of --precision. def build_network(): network = resnet_model.resnet_v1( resnet_depth=params['resnet_depth'], num_classes=params['num_label_classes'], dropblock_size=params['dropblock_size'], dropblock_keep_probs=dropblock_keep_probs, data_format=params['data_format']) return network(inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) if params['precision'] == 'bfloat16': with tf.contrib.tpu.bfloat16_scope(): logits = build_network() logits = tf.cast(logits, tf.float32) elif params['precision'] == 'float32': logits = build_network() if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'classes': tf.argmax(logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'classify': tf.estimator.export.PredictOutput(predictions) }) # If necessary, in the model_fn, use params['batch_size'] instead the batch # size flags (--train_batch_size or --eval_batch_size). batch_size = params['batch_size'] # pylint: disable=unused-variable # Calculate loss, which includes softmax cross entropy and L2 regularization. one_hot_labels = tf.one_hot(labels, params['num_label_classes']) cross_entropy = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=one_hot_labels, label_smoothing=params['label_smoothing']) # Add weight decay to the loss for non-batch-normalization variables. loss = cross_entropy + params['weight_decay'] * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) host_call = None if mode == tf.estimator.ModeKeys.TRAIN: # Compute the current epoch and associated learning rate from global_step. global_step = tf.train.get_global_step() steps_per_epoch = params['num_train_images'] / params[ 'train_batch_size'] current_epoch = (tf.cast(global_step, tf.float32) / steps_per_epoch) # LARS is a large batch optimizer. LARS enables higher accuracy at batch 16K # and larger batch sizes. if params['enable_lars']: learning_rate = 0.0 optimizer = lars_util.init_lars_optimizer(current_epoch, params) raise ValueError( 'LARS unexpected in the context of IGT experiments.') else: learning_rate = linear_learning_rate_schedule(params, global_step) if FLAGS.optimizer == 'momentum': tf.logging.info('Using MomentumOptimizer ({}).'.format( params['momentum'])) optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=params['momentum'], use_nesterov=False) elif FLAGS.optimizer == 'adam': tf.logging.info('Using AdamOptimizer') optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) elif FLAGS.optimizer == 'eigt': tf.logging.info('Using ExpIgtOptimizer {} tail: {}'.format( FLAGS.igt_optimizer, FLAGS.tail_fraction)) optimizer = exp_igt_optimizer.ExpIgtOptimizer( learning_rate, tail_fraction=FLAGS.tail_fraction, optimizer=FLAGS.igt_optimizer) else: raise ValueError('{} is not a supported optimizer'.format( FLAGS.optimizer)) if params['use_tpu']: # When using TPU, wrap the optimizer with CrossShardOptimizer which # handles synchronization details between different TPU cores. To the # user, this should look like regular synchronous training. optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) # Batch normalization requires UPDATE_OPS to be added as a dependency to # the train operation. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) if not params['skip_host_call']: def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] # Host call fns are executed params['iterations_per_loop'] times after # one TPU loop is finished, setting max_queue value to the same as # number of iterations will make the summary writer only flush the data # to storage once per loop. with summary.create_file_writer( get_model_dir(params), max_queue=params['iterations_per_loop']).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops() # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) else: train_op = None eval_metrics = None scaffold_fn = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ predictions = tf.argmax(logits, axis=1) top_1_accuracy = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) top_5_accuracy = tf.metrics.mean(in_top_5) return { 'top_1_accuracy': top_1_accuracy, 'top_5_accuracy': top_5_accuracy, } eval_metrics = (metric_fn, [labels, logits]) if FLAGS.mode == 'eval_igt' and FLAGS.igt_eval_mode == 'true': tf.logging.info('Using true param loading saver.') def scaffold_fn_true_params(): """Returns a scaffold that loads the true values into vars.""" var_mapping = {} trainable_vars = set(tf.trainable_variables()) for var in tf.global_variables(): if var in trainable_vars: var_mapping[var.op.name + '/true_param'] = var else: var_mapping[var.op.name] = var tf.logging.info('Mapping: {}'.format(var_mapping)) saver = tf.train.Saver(var_list=var_mapping, sharded=True) return tf.train.Scaffold(saver=saver) scaffold_fn = scaffold_fn_true_params return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
def doTestApplyGradients(self, use_resource=False): """Validate the IGT update (i.e. apply_gradients) against a python impl.""" # TODO(manzagop): try dtypes.half and dtypes.float64: for dtype in [dtypes.float32]: print('running for dtype {}'.format(dtype)) with self.test_session(): # Set up 2 variables and constants for their gradients. var0_value = np.array([1.0, 2.0]) var1_value = np.array([3.0, 4.0]) if use_resource: var0 = resource_variable_ops.ResourceVariable(var0_value, dtype=dtype) var1 = resource_variable_ops.ResourceVariable(var1_value, dtype=dtype) else: var0 = tf_variables.Variable(var0_value, dtype=dtype) var1 = tf_variables.Variable(var1_value, dtype=dtype) grads0 = tf.placeholder(dtype, shape=var0.get_shape()) grads1 = tf.placeholder(dtype, shape=var1.get_shape()) # TODO(manzagop): use a different tail fraction once validator support. igt_opt = exp_igt_optimizer.ExpIgtOptimizer( learning_rate=LEARNING_RATE, tail_fraction=1.) igt_update = igt_opt.apply_gradients( list(zip([grads0, grads1], [var0, var1])), global_step=tf.train.get_global_step()) tf_variables.global_variables_initializer().run() # Validate we have slots. expected_slot_names = set(['estimate', 'true_param', 'update']) self.assertEqual(expected_slot_names, set(igt_opt.get_slot_names())) for slot_name in expected_slot_names: for var in [var0, var1]: slot = igt_opt.get_slot(var, slot_name) self.assertEqual(slot.get_shape(), var.get_shape()) self.assertNotIn(slot, tf_variables.trainable_variables()) # Validate initial values. validators = [ IgtValidator(var0_value, LEARNING_RATE), IgtValidator(var1_value, LEARNING_RATE) ] self._validate(igt_opt, [var0, var1], validators) # Run first update and validate. g0_first = np.array([0.1, 0.1]) g1_first = np.array([0.01, 0.01]) igt_update.run({grads0: g0_first, grads1: g1_first}) validators[0].update(g0_first) validators[1].update(g1_first) self._validate(igt_opt, [var0, var1], validators) # Run second update and validate. g0_second = np.array([0.1, 0.1]) g1_second = np.array([0.01, 0.01]) igt_update.run({grads0: g0_second, grads1: g1_second}) validators[0].update(g0_second) validators[1].update(g1_second) self._validate(igt_opt, [var0, var1], validators)