def get_hit_rate(self): total = self.var[0] hits = self.var[1] hit_rate = (tf.cast(hits, tf.float32) / tf.maximum( tf.constant(1, dtype=tf.float32), tf.cast(total, tf.float32))) with tf.control_dependencies([hit_rate]): update_var = self.var.assign_add([-total, -hits]) with tf.control_dependencies([update_var]): return tf.identity(hit_rate)
def crop_gt_masks(self, gt_mask_size): """Crops the ground truth binary masks and resize to fixed-size masks.""" num_boxes = tf.shape(self._boxes)[0] num_masks = tf.shape(self._masks)[0] assert_length = tf.Assert(tf.equal(num_boxes, num_masks), [num_masks]) def padded_bounding_box_fn(): return tf.reshape(self._masks, [-1, self._ori_height, self._ori_width, 1]) def zeroed_box_fn(): return tf.zeros([0, self._ori_height, self._ori_width, 1]) num_masks = tf.shape(self._masks)[0] # Check if there is any instance in this image or not. scaled_masks = tf.cond(num_masks > 0, padded_bounding_box_fn, zeroed_box_fn) with tf.control_dependencies([assert_length]): cropped_gt_masks = tf.image.crop_and_resize( image=scaled_masks, boxes=self._boxes, box_ind=tf.range(num_masks, dtype=tf.int32), crop_size=[gt_mask_size, gt_mask_size], method='bilinear')[:, :, :, 0] cropped_gt_masks = tf.pad(cropped_gt_masks, paddings=tf.constant([[ 0, 0, ], [ 2, 2, ], [2, 2]]), mode='CONSTANT', constant_values=0.) return cropped_gt_masks
def pad_to_fixed_size(data, pad_value, output_shape): """Pad data to a fixed length at the first dimension. Args: data: Tensor to be padded to output_shape. pad_value: A constant value assigned to the paddings. output_shape: The output shape of a 2D tensor. Returns: The Padded tensor with output_shape [max_num_instances, dimension]. """ max_num_instances = output_shape[0] dimension = output_shape[1] data = tf.reshape(data, [-1, dimension]) num_instances = tf.shape(data)[0] assert_length = tf.Assert(tf.less_equal(num_instances, max_num_instances), [num_instances]) with tf.control_dependencies([assert_length]): pad_length = max_num_instances - num_instances paddings = pad_value * tf.ones([pad_length, dimension]) padded_data = tf.concat([data, paddings], axis=0) padded_data = tf.reshape(padded_data, output_shape) return padded_data
def train_ddpg(dataset, policy, actor_optimizer=None, critic_optimizer=None, pack_transition_fn=None, ddpg_graph_fn=None, log_dir=None, master='local', task=0, training_steps=None, max_training_steps=100000, reuse=False, init_checkpoint=None, update_target_every_n_steps=50, log_every_n_steps=None, save_checkpoint_steps=500, save_summaries_steps=500): """Self-contained learning loop for offline Q-learning. Code inspired by OpenAI Baselines' deepq.build_train. This function is compatible with discrete Q-learning graphs, continuous Q learning graphs, and SARSA. Args: dataset: tf.data.Dataset providing transitions. policy: Instance of TFDQNPolicy class that provides functor for building the critic function. actor_optimizer: Optional instance of an optimizer for the actor network. If not specified, creates an AdamOptimizer using the default constructor. critic_optimizer: Optional instance of an optimizer for the critic network. If not specified, creates an AdamOptimizer using the default constructor. pack_transition_fn: Optional function that performs additional processing of the transition. This is a convenience method for ad-hoc manipulation of transition data passed to the learning function after parsing. ddpg_graph_fn: Function used to construct training objectives w.r.t. critic outputs. log_dir: Where to save model checkpoints and tensorboard summaries. master: Optional address of master worker. Specify this when doing distributed training. task: Optional worker task for distributed training. Defaults to solo master task on a single machine. training_steps: Optional number of steps to run training before terminating early. Max_training_steps remains unchanged - training will terminate after max_training_steps whether or not training_steps is specified. max_training_steps: maximum number of training iters. reuse: If True, reuse existing variables for all declared variables by this function. init_checkpoint: Optional checkpoint to restore prior to training. If not provided, variables are initialized using global_variables_initializer(). update_target_every_n_steps: How many global steps (training) between copying the Q network weights (scope='q_func') to target network (scope='target_q_func'). log_every_n_steps: How many global steps between logging loss tensors. save_checkpoint_steps: How many global steps between saving TF variables to a checkpoint file. save_summaries_steps: How many global steps between saving TF summaries. Returns: (int) Current `global_step` reached after training for training_steps, or `max_training_steps` if `global_step` has reached `max_training_steps`. """ data_iterator = dataset.make_one_shot_iterator() transition = data_iterator.get_next() if pack_transition_fn: transition = pack_transition_fn(transition) if actor_optimizer is None: actor_optimizer = tf.train.AdamOptimizer() if critic_optimizer is None: critic_optimizer = tf.train.AdamOptimizer() a_func = policy.get_a_func(is_training=True, reuse=reuse) q_func = policy.get_q_func(is_training=True, reuse=reuse) actor_loss, critic_loss, all_summaries = ddpg_graph_fn( a_func, q_func, transition) a_func_vars = tf.contrib.framework.get_trainable_variables(scope='a_func') q_func_vars = framework.get_trainable_variables(scope='q_func') target_q_func_vars = framework.get_trainable_variables(scope='target_q_func') # with tf.variable_scope('ddpg', use_resource=True): global_step = tf.train.get_or_create_global_step() # CRITIC OPTIMIZATION # Only optimize q_func and update its batchnorm params. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='q_func') critic_train_op = tf.contrib.training.create_train_op( critic_loss, critic_optimizer, global_step=global_step, update_ops=update_ops, summarize_gradients=True, variables_to_train=q_func_vars, ) # ACTOR OPTIMIZATION update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='a_func') actor_train_op = tf.contrib.training.create_train_op( actor_loss, actor_optimizer, global_step=None, summarize_gradients=True, variables_to_train=a_func_vars, ) # Combine losses to train both actor and critic simultaneously. train_op = critic_train_op + actor_train_op chief_hooks = [] hooks = [] # Save summaries periodically. if save_summaries_steps is not None: chief_hooks.append(tf.train.SummarySaverHook( save_steps=save_summaries_steps, output_dir=log_dir, summary_op=all_summaries)) # Stop after training_steps if max_training_steps: hooks.append(tf.train.StopAtStepHook(last_step=max_training_steps)) # Report if loss tensor is NaN. hooks.append(tf.train.NanTensorHook(actor_loss)) hooks.append(tf.train.NanTensorHook(critic_loss)) if log_every_n_steps is not None: tensor_dict = { 'global_step': global_step, 'actor loss': actor_loss, 'critic_loss': critic_loss } chief_hooks.append( tf.train.LoggingTensorHook(tensor_dict, every_n_iter=log_every_n_steps)) # Measure how fast we are training per sec and save to summary. chief_hooks.append(tf.train.StepCounterHook( every_n_steps=log_every_n_steps, output_dir=log_dir)) # If target network exists, periodically update target Q network with new # weights (frozen target network). We hack this by # abusing a LoggingTensorHook for this. if target_q_func_vars and update_target_every_n_steps is not None: update_target_expr = [] for var, var_t in zip(sorted(q_func_vars, key=lambda v: v.name), sorted(target_q_func_vars, key=lambda v: v.name)): update_target_expr.append(var_t.assign(var)) update_target_expr = tf.group(*update_target_expr) with tf.control_dependencies([update_target_expr]): update_target = tf.constant(0) chief_hooks.append( tf.train.LoggingTensorHook({'update_target': update_target}, every_n_iter=update_target_every_n_steps)) # Save checkpoints periodically, save all of them. saver = tf.train.Saver(max_to_keep=None) chief_hooks.append(tf.train.CheckpointSaverHook( log_dir, save_steps=save_checkpoint_steps, saver=saver, checkpoint_basename='model.ckpt')) # Save our experiment params to checkpoint dir. chief_hooks.append(gin.tf.GinConfigSaverHook(log_dir, summarize_config=True)) session_config = tf.ConfigProto(log_device_placement=True) init_fn = None if init_checkpoint: assign_fn = tf.contrib.framework.assign_from_checkpoint_fn( init_checkpoint, framework.get_model_variables()) init_fn = lambda _, sess: assign_fn(sess) scaffold = tf.train.Scaffold(saver=saver, init_fn=init_fn) with tf.train.MonitoredTrainingSession( master=master, is_chief=(task == 0), config=session_config, checkpoint_dir=log_dir, scaffold=scaffold, hooks=hooks, chief_only_hooks=chief_hooks) as sess: np_step = 0 while not sess.should_stop(): np_step, _ = sess.run([global_step, train_op]) if training_steps and np_step % training_steps == 0: break done = np_step >= max_training_steps return np_step, done
def _model_fn(features, labels, mode, params, model): """Model defination for the RetinaNet model based on ResNet-50. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the RetinaNet model outputs class logits and box regression outputs. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. """ cls_outputs, box_outputs = model( features, min_level=params['min_level'], max_level=params['max_level'], num_classes=params['num_classes'], num_anchors=len(params['aspect_ratios'] * params['num_scales']), is_training_bn=params['is_training_bn']) levels = cls_outputs.keys() # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'image': features, } for level in levels: predictions['cls_outputs_%d' % level] = cls_outputs[level] predictions['box_outputs_%d' % level] = box_outputs[level] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Load pretrained model from checkpoint. if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN: def scaffold_fn(): """Loads pretrained model through scaffold function.""" tf.train.init_from_checkpoint(params['resnet_checkpoint'], { '/': 'resnet50/', }) return tf.train.Scaffold() else: scaffold_fn = None # Set up training loss and learning rate. global_step = tf.train.get_global_step() learning_rate = _learning_rate_schedule(params['learning_rate'], params['lr_warmup_step'], params['lr_drop_step'], global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs, labels, params) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=params['momentum']) if params['use_tpu']: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step) else: train_op = None # Evaluation only works on GPU/CPU host and batch_size=1 eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Evaluation metric fn. Performed on CPU, do not reference TPU ops.""" eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes']) cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) # add metrics to output cls_outputs = {} box_outputs = {} for level in range(params['min_level'], params['max_level'] + 1): cls_outputs[level] = kwargs['cls_outputs_%d' % level] box_outputs[level] = kwargs['box_outputs_%d' % level] detections = anchor_labeler.generate_detections( cls_outputs, box_outputs, kwargs['source_ids']) eval_metric = coco_metric.EvaluationMetric(params['val_json_file']) coco_metrics = eval_metric.estimator_metric_fn(detections, kwargs['image_scales']) # Add metrics to output. output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics batch_size = params['batch_size'] cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [ batch_size, ]), [batch_size, 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [ batch_size, ]), [batch_size, 1]) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'source_ids': labels['source_ids'], 'image_scales': labels['image_scales'], } for level in range(params['min_level'], params['max_level'] + 1): metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level] metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level] eval_metrics = (metric_fn, metric_fn_inputs) return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)