def efficientdet(features, model_name=None, config=None, **kwargs): """Build EfficientDet model.""" if not config and not model_name: raise ValueError('please specify either model name or config') if not config: config = hparams_config.get_efficientdet_config(model_name) elif isinstance(config, dict): config = hparams_config.Config(config) # wrap dict in Config object if kwargs: config.override(kwargs) logging.info(config) # build backbone features. features = build_backbone(features, config) logging.info('backbone params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # build feature network. fpn_feats = build_feature_network(features, config) logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # build class and box predictions. class_outputs, box_outputs = build_class_and_box_outputs(fpn_feats, config) logging.info('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) return class_outputs, box_outputs
def efficientdet(model_name=None, config=None, **kwargs): """Build EfficientDet model. Args: features: input tensor. model_name: String of the model (eg. efficientdet-d0) config: Dict of parameters for the network **kwargs: other parameters. Returns: A tuple (class_outputs, box_outputs) for predictions. """ if not config and not model_name: raise ValueError('please specify either model name or config') if not config: config = hparams_config.get_efficientdet_config(model_name) elif isinstance(config, dict): config = hparams_config.Config(config) # wrap dict in Config object if kwargs: config.override(kwargs) logging.info(config) inputs = tf.keras.layers.Input( [*utils.parse_image_size(config.image_size), 3]) # build backbone features. features, backbone_outputs = build_backbone(inputs, config) logging.info('backbone params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # build feature network. fpn_feats = build_feature_network(features, config) logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # build class and box predictions. class_outputs, box_outputs = build_class_and_box_outputs(fpn_feats, config) logging.info('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) return tf.keras.Model( inputs=inputs, outputs=[backbone_outputs, class_outputs, box_outputs])
def efficientdet(features, model_name=None, config=None, **kwargs): """Build EfficientDet model. Args: features: input tensor. model_name: String of the model (eg. efficientdet-d0) config: Dict of parameters for the network **kwargs: other parameters. Returns: A tuple (class_outputs, box_outputs) for predictions. """ if not config and not model_name: raise ValueError('please specify either model name or config') if not config: config = hparams_config.get_efficientdet_config(model_name) elif isinstance(config, dict): config = hparams_config.Config(config) # wrap dict in Config object if kwargs: config.override(kwargs) logging.info(config) # build backbone features. features = legacy_arch.build_backbone(features, config) logging.info('backbone params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # build feature network. fpn_feats = legacy_arch.build_feature_network(features, config) logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # build class and box predictions. class_box = BuildClassAndBoxOutputs(**config) class_outputs, box_outputs = class_box.call(fpn_feats) logging.info('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) return class_outputs, box_outputs
def retinanet(features, model_name='retinanet-50', config=None, **kwargs): """RetinaNet classification and regression model.""" if not config: config = hparams_config.get_retinanet_config(model_name) config.override(kwargs) min_level = config.get('min_level', 3) max_level = config.get('max_level', 7) num_classes = config.get('num_classes', 90) resnet_depth = config.get('resnet_depth', 50) use_nearest_upsampling = config.get('resnet_depth', True) is_training_bn = config.get('is_training_bn', False) num_anchors = len(config.aspect_ratios) * config.num_scales # create feature pyramid networks feats = resnet_fpn(features, min_level, max_level, resnet_depth, is_training_bn, use_nearest_upsampling) logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # add class net and box net in RetinaNet. The class net and the box net are # shared among all the levels. with tf.variable_scope('retinanet'): class_outputs = {} box_outputs = {} with tf.variable_scope('class_net', reuse=tf.AUTO_REUSE): for level in range(min_level, max_level + 1): class_outputs[level] = class_net(feats[level], level, num_classes, num_anchors, is_training_bn) with tf.variable_scope('box_net', reuse=tf.AUTO_REUSE): for level in range(min_level, max_level + 1): box_outputs[level] = box_net(feats[level], level, num_anchors, is_training_bn) logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) return class_outputs, box_outputs
def build_model(self, inputs: tf.Tensor, is_training: bool = False) -> List[tf.Tensor]: """Build model with inputs and labels and print out model stats.""" tf.logging.info('start building model') model_arch = det_model_fn.get_model_arch(self.model_name) cls_outputs, box_outputs = model_arch(inputs, model_name=self.model_name, is_training_bn=is_training, use_bfloat16=False, **self.model_overrides) print('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) all_outputs = list(cls_outputs.values()) + list(box_outputs.values()) return all_outputs
def build_model(self, model_name, isize, is_training=False, data_format='channels_last'): if isinstance(isize, int): isize = (isize, isize) if data_format == 'channels_first': inputs_shape = [1, 3, isize[0], isize[1]] else: inputs_shape = [1, isize[0], isize[1], 3] inputs = tf.ones(shape=inputs_shape, name='input', dtype=tf.float32) efficientdet_arch.efficientdet(inputs, model_name=model_name, is_training_bn=is_training, image_size=isize, data_format=data_format) return utils.num_params_flops(False)
def build_model(self, model_name, isize=None, is_training=False, data_format='channels_last'): config = hparams_config.get_efficientdet_config(model_name) config.image_size = isize or config.image_size isize = utils.parse_image_size(config.image_size) if data_format == 'channels_first': inputs_shape = [1, 3, isize[0], isize[1]] else: inputs_shape = [1, isize[0], isize[1], 3] inputs = tf.ones(shape=inputs_shape, name='input', dtype=tf.float32) efficientdet_arch.efficientdet(inputs, model_name=model_name, is_training_bn=is_training, image_size=isize, data_format=data_format) return utils.num_params_flops(False)
def build_model(self, inputs: tf.Tensor, is_training: bool = False) -> List[tf.Tensor]: """Build model with inputs and labels and print out model stats.""" logging.info('start building model') cls_outputs, box_outputs = inference.build_model( self.model_name, inputs, is_training_bn=is_training, config=self.model_config) print('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # Write to tfevent for tensorboard. train_writer = tf.summary.FileWriter(self.logdir) train_writer.add_graph(tf.get_default_graph()) train_writer.flush() all_outputs = list(cls_outputs.values()) + list(box_outputs.values()) return all_outputs
def build_model(self, inputs: tf.Tensor, is_training: bool) -> List[tf.Tensor]: """Build model with inputs and labels and print out model stats.""" tf.logging.info('start building model') if self.model_name.startswith('efficientdet'): cls_outputs, box_outputs = efficientdet_arch.efficientdet( inputs, model_name=self.model_name, is_training_bn=is_training, use_bfloat16=False) elif self.model_name.startswith('retinanet'): cls_outputs, box_outputs = retinanet_arch.retinanet( inputs, model_name=self.model_name, is_training_bn=is_training, use_bfloat16=False) print('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) all_outputs = list(cls_outputs.values()) + list(box_outputs.values()) return all_outputs
def model_fn(features, labels, mode, params): """The model_fn to be used with TPUEstimator. Args: features: A dict of `Tensor` of batched images and other features. labels: a Tensor or a dict of Tensor representing the batched labels. mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}` params: `dict` of parameters passed to the model from the TPUEstimator, `params['batch_size']` is always provided and should be used as the effective batch size. Returns: A `TPUEstimatorSpec` for the model """ logging.info('params=%s', params) images = features['image'] if isinstance(features, dict) else features labels = labels['label'] if isinstance(labels, dict) else labels config = params['config'] image_size = params['image_size'] utils.scalar('model/resolution', image_size) if config.model.data_format == 'channels_first': images = tf.transpose(images, [0, 3, 1, 2]) is_training = (mode == tf.estimator.ModeKeys.TRAIN) has_moving_average_decay = (config.train.ema_decay > 0) if FLAGS.use_tpu and not config.model.bn_type: config.model.bn_type = 'tpu_bn' # This is essential, if using a keras-derived model. tf.keras.backend.set_learning_phase(is_training) def build_model(in_images): """Build model using the model_name given through the command line.""" config.model.num_classes = config.data.num_classes model = effnetv2_model.EffNetV2Model(config.model.model_name, config.model) logits = model(in_images, training=is_training)[0] return logits pre_num_params, pre_num_flops = utils.num_params_flops( readable_format=True) if config.runtime.mixed_precision: precision = 'mixed_bfloat16' if FLAGS.use_tpu else 'mixed_float16' logits = utils.build_model_with_precision(precision, build_model, images, is_training) logits = tf.cast(logits, tf.float32) else: logits = build_model(images) num_params, num_flops = utils.num_params_flops(readable_format=True) num_params = num_params - pre_num_params num_flops = (num_flops - pre_num_flops) / params['batch_size'] logging.info('backbone params/flops = %.4f M / %.4f B', num_params, num_flops) utils.scalar('model/params', num_params) utils.scalar('model/flops', num_flops) # Calculate loss, which includes softmax cross entropy and L2 regularization. if config.train.loss_type == 'sigmoid': cross_entropy = tf.losses.sigmoid_cross_entropy( multi_class_labels=tf.cast(labels, dtype=logits.dtype), logits=logits, label_smoothing=config.train.label_smoothing) elif config.train.loss_type == 'custom': xent = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast( labels, dtype=logits.dtype), logits=logits) cross_entropy = tf.reduce_mean(tf.reduce_sum(xent, axis=-1)) else: if config.data.multiclass: logging.info('use multi-class loss: %s', config.data.multiclass) labels /= tf.reshape(tf.reduce_sum(labels, axis=1), (-1, 1)) cross_entropy = tf.losses.softmax_cross_entropy( onehot_labels=labels, logits=logits, label_smoothing=config.train.label_smoothing) train_steps = max(config.train.min_steps, config.train.epochs * params['steps_per_epoch']) global_step = tf.train.get_global_step() weight_decay_inc = config.train.weight_decay_inc * ( tf.cast(global_step, tf.float32) / tf.cast(train_steps, tf.float32)) weight_decay = (1 + weight_decay_inc) * config.train.weight_decay utils.scalar('train/weight_decay', weight_decay) # Add weight decay to the loss for non-batch-normalization variables. matcher = re.compile(config.train.weight_decay_exclude) l2loss = weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if not matcher.match(v.name) ]) loss = cross_entropy + l2loss utils.scalar('loss/l2reg', l2loss) utils.scalar('loss/xent', cross_entropy) if has_moving_average_decay: ema = tf.train.ExponentialMovingAverage(decay=config.train.ema_decay, num_updates=global_step) ema_vars = utils.get_ema_vars() host_call = None restore_vars_dict = None if is_training: # Compute the current epoch and associated learning rate from global_step. current_epoch = (tf.cast(global_step, tf.float32) / params['steps_per_epoch']) utils.scalar('train/epoch', current_epoch) scaled_lr = config.train.lr_base * (config.train.batch_size / 256.0) scaled_lr_min = config.train.lr_min * (config.train.batch_size / 256.0) learning_rate = utils.WarmupLearningRateSchedule( scaled_lr, steps_per_epoch=params['steps_per_epoch'], decay_epochs=config.train.lr_decay_epoch, warmup_epochs=config.train.lr_warmup_epoch, decay_factor=config.train.lr_decay_factor, lr_decay_type=config.train.lr_sched, total_steps=train_steps, minimal_lr=scaled_lr_min)(global_step) utils.scalar('train/lr', learning_rate) optimizer = utils.build_optimizer( learning_rate, optimizer_name=config.train.optimizer) if FLAGS.use_tpu: # When using TPU, wrap the optimizer with CrossShardOptimizer which # handles synchronization details between different TPU cores. To the # user, this should look like regular synchronous training. optimizer = tf.tpu.CrossShardOptimizer(optimizer) # filter trainable variables if needed. var_list = tf.trainable_variables() if config.train.varsexp: vars2 = [ v for v in var_list if re.match(config.train.varsexp, v.name) ] if len(vars2) == len(var_list): logging.warning('%s has no match.', config.train.freeze) logging.info('Filter variables: orig=%d, final=%d, delta=%d', len(var_list), len(vars2), len(var_list) - len(vars2)) var_list = vars2 # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if config.train.gclip and is_training: logging.info('clip gradients norm by %f', config.train.gclip) grads_and_vars = optimizer.compute_gradients(loss, var_list) with tf.name_scope('gclip'): grads = [gv[0] for gv in grads_and_vars] tvars = [gv[1] for gv in grads_and_vars] utils.scalar('train/gnorm', tf.linalg.global_norm(grads)) utils.scalar('train/gnormmax', tf.math.reduce_max([tf.norm(g) for g in grads])) # First clip each variable's norm, then clip global norm. clip_norm = abs(config.train.gclip) clipped_grads = [ tf.clip_by_norm(g, clip_norm) if g is not None else None for g in grads ] clipped_grads, _ = tf.clip_by_global_norm( clipped_grads, clip_norm) grads_and_vars = list(zip(clipped_grads, tvars)) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step) else: with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step, var_list=var_list) if has_moving_average_decay: with tf.control_dependencies([train_op]): train_op = ema.apply(ema_vars) if not config.runtime.skip_host_call: host_call = utils.get_tpu_host_call( global_step, FLAGS.model_dir, config.runtime.iterations_per_loop) else: train_op = None if has_moving_average_decay: # Load moving average variables for eval. restore_vars_dict = ema.variables_to_restore(ema_vars) eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch, num_classes]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ metrics = {} if config.data.multiclass: metrics['eval/global_ap'] = tf.metrics.auc( labels, tf.nn.sigmoid(logits), curve='PR', num_thresholds=200, summation_method='careful_interpolation', name='global_ap') # Convert labels to set: be careful, tf.metrics.xx_at_k are horrible. labels = tf.cast(labels, dtype=tf.int64) label_to_repeat = tf.expand_dims(tf.argmax(labels, axis=-1), axis=-1) all_labels_set = tf.range(0, labels.shape[-1], dtype=tf.int64) all_labels_set = tf.expand_dims(all_labels_set, axis=0) labels_set = labels * all_labels_set + ( 1 - labels) * label_to_repeat metrics['eval/precision@1'] = tf.metrics.precision_at_k( labels_set, logits, k=1) metrics['eval/recall@1'] = tf.metrics.recall_at_k(labels_set, logits, k=1) metrics['eval/precision@5'] = tf.metrics.precision_at_k( labels_set, logits, k=5) metrics['eval/recall@5'] = tf.metrics.recall_at_k(labels_set, logits, k=5) # always add accuracy. labels = tf.argmax(labels, axis=1) predictions = tf.argmax(logits, axis=1) metrics['eval/acc_top1'] = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) metrics['eval/acc_top5'] = tf.metrics.mean(in_top_5) metrics['model/resolution'] = tf.metrics.mean(image_size) metrics['model/flops'] = tf.metrics.mean(num_flops) metrics['model/params'] = tf.metrics.mean(num_params) return metrics eval_metrics = (metric_fn, [labels, logits]) if has_moving_average_decay and not is_training: def scaffold_fn(): # read ema for eval jobs. saver = tf.train.Saver(restore_vars_dict) return tf.train.Scaffold(saver=saver) elif config.train.ft_init_ckpt and is_training: def scaffold_fn(): logging.info('restore variables from %s', config.train.ft_init_ckpt) var_map = utils.get_ckpt_var_map( ckpt_path=config.train.ft_init_ckpt, skip_mismatch=True, init_ema=config.train.ft_init_ema) tf.train.init_from_checkpoint(config.train.ft_init_ckpt, var_map) return tf.train.Scaffold() else: scaffold_fn = None return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)