def export(self, output_dir: Text = None, tensorrt: Text = None, tflite: Text = None): """Export a saved model, frozen graph, and potential tflite/tensorrt model. Args: output_dir: the output folder for saved model. tensorrt: If not None, must be {'FP32', 'FP16', 'INT8'}. tflite: If not None, must be {'FP32', 'FP16', 'INT8'}. """ if not self.model: self.build() export_model = ExportModel(self.model) if output_dir: tf.saved_model.save( export_model, output_dir, signatures=export_model.__call__.get_concrete_function( tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.uint8, name='images'))) logging.info('Model saved at %s', output_dir) # also save freeze pb file. graphdef = self.freeze( export_model.__call__.get_concrete_function( tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.uint8, name='images'))) proto_path = tf.io.write_graph(graphdef, output_dir, self.model_name + '_frozen.pb', as_text=False) logging.info('Frozen graph saved at %s', proto_path) if tflite: image_size = utils.parse_image_size(self.params['image_size']) converter = tf.lite.TFLiteConverter.from_concrete_functions([ export_model.tflite_call.get_concrete_function( tf.TensorSpec(shape=[1, *image_size, 3])) ]) if tflite == 'FP32': converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_types = [tf.float32] elif tflite == 'FP16': converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_types = [tf.float16] elif tflite == 'INT8': num_calibration_steps = 10 def representative_dataset_gen( ): # rewrite this for real data. for _ in range(num_calibration_steps): yield [np.ones((1, *image_size, 3), dtype=np.float32)] converter.representative_dataset = representative_dataset_gen converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.inference_input_type = tf.uint8 converter.inference_output_type = tf.uint8 converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS_INT8 ] else: raise ValueError('tflite must be one of {FP32, FP16, INT8}.') tflite_path = os.path.join(output_dir, tflite.lower() + '.tflite') tflite_model = converter.convert() tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_model) logging.info('TFLite is saved at %s', tflite_path) if tensorrt: trt_path = os.path.join(output_dir, 'tensorrt_' + tensorrt.lower()) conversion_params = tf.experimental.tensorrt.ConversionParams( max_workspace_size_bytes=(2 << 20), maximum_cached_engines=1, precision_mode=tensorrt.upper()) converter = tf.experimental.tensorrt.Converter( output_dir, conversion_params=conversion_params) converter.convert() converter.save(trt_path) logging.info('TensorRT model is saved at %s', trt_path)
def main(argv): del argv # Unused. if FLAGS.use_tpu: tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tpu_grpc_url = tpu_cluster_resolver.get_master() tf.Session.reset(tpu_grpc_url) else: tpu_cluster_resolver = None # Check data path if FLAGS.mode in ( 'train', 'train_and_eval') and FLAGS.training_file_pattern is None: raise RuntimeError( 'You must specify --training_file_pattern for training.') if FLAGS.mode in ('eval', 'train_and_eval'): if FLAGS.validation_file_pattern is None: raise RuntimeError('You must specify --validation_file_pattern ' 'for evaluation.') # Parse and override hparams config = hparams_config.get_detection_config(FLAGS.model_name) config.override(FLAGS.hparams) if FLAGS.num_epochs: # NOTE: remove this flag after updating all docs. config.num_epochs = FLAGS.num_epochs # Parse image size in case it is in string format. config.image_size = utils.parse_image_size(config.image_size) # The following is for spatial partitioning. `features` has one tensor while # `labels` had 4 + (`max_level` - `min_level` + 1) * 2 tensors. The input # partition is performed on `features` and all partitionable tensors of # `labels`, see the partition logic below. # In the TPUEstimator context, the meaning of `shard` and `replica` is the # same; follwing the API, here has mixed use of both. if FLAGS.use_spatial_partition: # Checks input_partition_dims agrees with num_cores_per_replica. if FLAGS.num_cores_per_replica != np.prod(FLAGS.input_partition_dims): raise RuntimeError( '--num_cores_per_replica must be a product of array' 'elements in --input_partition_dims.') labels_partition_dims = { 'mean_num_positives': None, 'source_ids': None, 'groundtruth_data': None, 'image_scales': None, } # The Input Partition Logic: We partition only the partition-able tensors. # Spatial partition requires that the to-be-partitioned tensors must have a # dimension that is a multiple of `partition_dims`. Depending on the # `partition_dims` and the `image_size` and the `max_level` in config, some # high-level anchor labels (i.e., `cls_targets` and `box_targets`) cannot # be partitioned. For example, when `partition_dims` is [1, 4, 2, 1], image # size is 1536, `max_level` is 9, `cls_targets_8` has a shape of # [batch_size, 6, 6, 9], which cannot be partitioned (6 % 4 != 0). In this # case, the level-8 and level-9 target tensors are not partition-able, and # the highest partition-able level is 7. feat_sizes = utils.get_feat_sizes(config.get('image_size'), config.get('max_level')) for level in range(config.get('min_level'), config.get('max_level') + 1): def _can_partition(spatial_dim): partitionable_index = np.where( spatial_dim % np.array(FLAGS.input_partition_dims) == 0) return len(partitionable_index[0]) == len( FLAGS.input_partition_dims) spatial_dim = feat_sizes[level] if _can_partition(spatial_dim['height']) and _can_partition( spatial_dim['width']): labels_partition_dims['box_targets_%d' % level] = FLAGS.input_partition_dims labels_partition_dims['cls_targets_%d' % level] = FLAGS.input_partition_dims else: labels_partition_dims['box_targets_%d' % level] = None labels_partition_dims['cls_targets_%d' % level] = None num_cores_per_replica = FLAGS.num_cores_per_replica input_partition_dims = [ FLAGS.input_partition_dims, labels_partition_dims ] num_shards = FLAGS.num_cores // num_cores_per_replica else: num_cores_per_replica = None input_partition_dims = None num_shards = FLAGS.num_cores params = dict( config.as_dict(), model_name=FLAGS.model_name, iterations_per_loop=FLAGS.iterations_per_loop, model_dir=FLAGS.model_dir, num_shards=num_shards, num_examples_per_epoch=FLAGS.num_examples_per_epoch, use_tpu=FLAGS.use_tpu, backbone_ckpt=FLAGS.backbone_ckpt, ckpt=FLAGS.ckpt, val_json_file=FLAGS.val_json_file, testdev_dir=FLAGS.testdev_dir, mode=FLAGS.mode, ) config_proto = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) if FLAGS.use_xla and not FLAGS.use_tpu: config_proto.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_1) tpu_config = tf.estimator.tpu.TPUConfig( FLAGS.iterations_per_loop, num_shards=num_shards, num_cores_per_replica=num_cores_per_replica, input_partition_dims=input_partition_dims, per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig. PER_HOST_V2) run_config = tf.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, evaluation_master=FLAGS.eval_master, model_dir=FLAGS.model_dir, log_step_count_steps=FLAGS.iterations_per_loop, session_config=config_proto, tpu_config=tpu_config, tf_random_seed=FLAGS.tf_random_seed, ) model_fn_instance = det_model_fn.get_model_fn(FLAGS.model_name) # TPU Estimator logging.info(params) if FLAGS.mode == 'train': train_estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn_instance, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, config=run_config, params=params) train_estimator.train( input_fn=dataloader.InputReader(FLAGS.training_file_pattern, is_training=True, use_fake_data=FLAGS.use_fake_data), max_steps=int((config.num_epochs * FLAGS.num_examples_per_epoch) / FLAGS.train_batch_size)) if FLAGS.eval_after_training: # Run evaluation after training finishes. eval_params = dict( params, use_tpu=FLAGS.use_tpu, input_rand_hflip=False, is_training_bn=False, precision=None, ) eval_estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn_instance, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, config=run_config, params=eval_params) eval_results = eval_estimator.evaluate( input_fn=dataloader.InputReader(FLAGS.validation_file_pattern, is_training=False), steps=FLAGS.eval_samples // FLAGS.eval_batch_size) logging.info('Eval results: %s', eval_results) ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) utils.archive_ckpt(eval_results, eval_results['AP'], ckpt) elif FLAGS.mode == 'eval': # Eval only runs on CPU or GPU host with batch_size = 1. # Override the default options: disable randomization in the input pipeline # and don't run on the TPU. eval_params = dict( params, use_tpu=FLAGS.use_tpu, input_rand_hflip=False, is_training_bn=False, precision=None, ) eval_estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn_instance, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, config=run_config, params=eval_params) def terminate_eval(): logging.info('Terminating eval after %d seconds of no checkpoints', FLAGS.eval_timeout) return True # Run evaluation when there's a new checkpoint for ckpt in tf.train.checkpoints_iterator( FLAGS.model_dir, min_interval_secs=FLAGS.min_eval_interval, timeout=FLAGS.eval_timeout, timeout_fn=terminate_eval): logging.info('Starting to evaluate.') try: eval_results = eval_estimator.evaluate( input_fn=dataloader.InputReader( FLAGS.validation_file_pattern, is_training=False), steps=FLAGS.eval_samples // FLAGS.eval_batch_size) logging.info('Eval results: %s', eval_results) # Terminate eval job when final checkpoint is reached. try: current_step = int(os.path.basename(ckpt).split('-')[1]) except IndexError: logging.info('%s has no global step info: stop!', ckpt) break utils.archive_ckpt(eval_results, eval_results['AP'], ckpt) total_step = int( (config.num_epochs * FLAGS.num_examples_per_epoch) / FLAGS.train_batch_size) if current_step >= total_step: logging.info('Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. logging.info( 'Checkpoint %s no longer exists, skipping checkpoint', ckpt) elif FLAGS.mode == 'train_and_eval': for cycle in range(config.num_epochs): logging.info('Starting training cycle, epoch: %d.', cycle) train_estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn_instance, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, config=run_config, params=params) train_estimator.train(input_fn=dataloader.InputReader( FLAGS.training_file_pattern, is_training=True, use_fake_data=FLAGS.use_fake_data), steps=int(FLAGS.num_examples_per_epoch / FLAGS.train_batch_size)) logging.info('Starting evaluation cycle, epoch: %d.', cycle) # Run evaluation after every epoch. eval_params = dict( params, use_tpu=FLAGS.use_tpu, input_rand_hflip=False, is_training_bn=False, ) eval_estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn_instance, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, config=run_config, params=eval_params) eval_results = eval_estimator.evaluate( input_fn=dataloader.InputReader(FLAGS.validation_file_pattern, is_training=False), steps=FLAGS.eval_samples // FLAGS.eval_batch_size) logging.info('Evaluation results: %s', eval_results) ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) utils.archive_ckpt(eval_results, eval_results['AP'], ckpt) else: logging.info('Mode not found.')
def main(_): config = hparams_config.get_efficientdet_config(FLAGS.model_name) config.override(FLAGS.hparams) config.batch_size = FLAGS.batch_size config.val_json_file = FLAGS.val_json_file config.nms_configs.max_nms_inputs = anchors.MAX_DETECTION_POINTS config.drop_remainder = False # eval all examples w/o drop. config.image_size = utils.parse_image_size(config['image_size']) if FLAGS.strategy == 'tpu': tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tf.config.experimental_connect_to_cluster(tpu_cluster_resolver) tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver) ds_strategy = tf.distribute.TPUStrategy(tpu_cluster_resolver) logging.info('All devices: %s', tf.config.list_logical_devices('TPU')) elif FLAGS.strategy == 'gpus': ds_strategy = tf.distribute.MirroredStrategy() logging.info('All devices: %s', tf.config.list_physical_devices('GPU')) else: if tf.config.list_physical_devices('GPU'): ds_strategy = tf.distribute.OneDeviceStrategy('device:GPU:0') else: ds_strategy = tf.distribute.OneDeviceStrategy('device:CPU:0') with ds_strategy.scope(): # Network model = efficientdet_keras.EfficientDetNet(config=config) model.build((config.batch_size, *config.image_size, 3)) model.load_weights(tf.train.latest_checkpoint(FLAGS.model_dir)) @tf.function def f(images, labels): cls_outputs, box_outputs = model(images, training=False) return postprocess.generate_detections(config, cls_outputs, box_outputs, labels['image_scales'], labels['source_ids']) # dataset ds = dataloader.InputReader( FLAGS.val_file_pattern, is_training=False, max_instances_per_image=config.max_instances_per_image)(config) if FLAGS.eval_samples: ds = ds.take(FLAGS.eval_samples // config.batch_size) # create a evaluator for AP calculation. label_map = label_util.get_label_map(config.label_map) evaluator = coco_metric.EvaluationMetric(filename=config.val_json_file, label_map=label_map) # evaluate all images. eval_samples = FLAGS.eval_samples or 5000 pbar = tf.keras.utils.Progbar(eval_samples // config.batch_size) for i, (images, labels) in enumerate(ds): detections = f(images, labels) evaluator.update_state( labels['groundtruth_data'].numpy(), postprocess.transform_detections(detections).numpy()) pbar.update(i) # compute the final eval results. metrics = evaluator.result() metric_dict = {} for i, name in enumerate(evaluator.metric_names): metric_dict[name] = metrics[i] if label_map: for i, cid in enumerate(sorted(label_map.keys())): name = 'AP_/%s' % label_map[cid] metric_dict[name] = metrics[i + len(evaluator.metric_names)] print(FLAGS.model_name, metric_dict)
def export(self, output_dir: Text, tflite_path: Text = None, tensorrt: Text = None): """Export a saved model, frozen graph, and potential tflite/tensorrt model. Args: output_dir: the output folder for saved model. tflite_path: the path for saved tflite file. tensorrt: If not None, must be {'FP32', 'FP16', 'INT8'}. """ signitures = self.signitures signature_def_map = { 'serving_default': tf.saved_model.predict_signature_def( {signitures['image_arrays'].name: signitures['image_arrays']}, {signitures['prediction'].name: signitures['prediction']}), } b = tf.saved_model.Builder(output_dir) b.add_meta_graph_and_variables(self.sess, tags=['serve'], signature_def_map=signature_def_map, assets_collection=tf.get_collection( tf.GraphKeys.ASSET_FILEPATHS), clear_devices=True) b.save() logging.info('Model saved at %s', output_dir) # also save freeze pb file. graphdef = self.freeze() pb_path = os.path.join(output_dir, self.model_name + '_frozen.pb') tf.io.gfile.GFile(pb_path, 'wb').write(graphdef.SerializeToString()) logging.info('Frozen graph saved at %s', pb_path) if tflite_path: height, width = utils.parse_image_size(self.params['image_size']) input_name = signitures['image_arrays'].op.name input_shapes = {input_name: [None, height, width, 3]} converter = tf.lite.TFLiteConverter.from_saved_model( output_dir, input_arrays=[input_name], input_shapes=input_shapes, output_arrays=[signitures['prediction'].op.name]) converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS ] tflite_model = converter.convert() tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_model) logging.info('TFLite is saved at %s', tflite_path) if tensorrt: from tensorflow.python.compiler.tensorrt import trt # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True)) trt_path = os.path.join(output_dir, 'tensorrt_' + tensorrt.lower()) trt.create_inference_graph(None, None, precision_mode=tensorrt, input_saved_model_dir=output_dir, output_saved_model_dir=trt_path, session_config=sess_config) logging.info('TensorRT model is saved at %s', trt_path)
def main(_): # Parse and override hparams config = hparams_config.get_detection_config(FLAGS.model_name) config.override(FLAGS.hparams) if FLAGS.num_epochs: # NOTE: remove this flag after updating all docs. config.num_epochs = FLAGS.num_epochs # Parse image size in case it is in string format. config.image_size = utils.parse_image_size(config.image_size) if FLAGS.use_xla and FLAGS.strategy != 'tpu': tf.config.optimizer.set_jit(True) for gpu in tf.config.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) if FLAGS.debug: tf.config.run_functions_eagerly(True) tf.debugging.set_log_device_placement(True) os.environ['TF_DETERMINISTIC_OPS'] = '1' tf.random.set_seed(FLAGS.tf_random_seed) logging.set_verbosity(logging.DEBUG) if FLAGS.strategy == 'tpu': tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tf.config.experimental_connect_to_cluster(tpu_cluster_resolver) tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver) ds_strategy = tf.distribute.TPUStrategy(tpu_cluster_resolver) logging.info('All devices: %s', tf.config.list_logical_devices('TPU')) elif FLAGS.strategy == 'gpus': ds_strategy = tf.distribute.MirroredStrategy() logging.info('All devices: %s', tf.config.list_physical_devices('GPU')) else: if tf.config.list_physical_devices('GPU'): ds_strategy = tf.distribute.OneDeviceStrategy('device:GPU:0') else: ds_strategy = tf.distribute.OneDeviceStrategy('device:CPU:0') steps_per_epoch = FLAGS.num_examples_per_epoch // FLAGS.batch_size params = dict(profile=FLAGS.profile, model_name=FLAGS.model_name, steps_per_execution=FLAGS.steps_per_execution, model_dir=FLAGS.model_dir, steps_per_epoch=steps_per_epoch, strategy=FLAGS.strategy, batch_size=FLAGS.batch_size, tf_random_seed=FLAGS.tf_random_seed, debug=FLAGS.debug, val_json_file=FLAGS.val_json_file, eval_samples=FLAGS.eval_samples, num_shards=ds_strategy.num_replicas_in_sync) config.override(params, True) # set mixed precision policy by keras api. precision = utils.get_precision(config.strategy, config.mixed_precision) policy = tf.keras.mixed_precision.Policy(precision) tf.keras.mixed_precision.set_global_policy(policy) def get_dataset(is_training, config): file_pattern = (FLAGS.train_file_pattern if is_training else FLAGS.val_file_pattern) if not file_pattern: raise ValueError('No matching files.') return dataloader.InputReader( file_pattern, is_training=is_training, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=config.max_instances_per_image, debug=FLAGS.debug)(config.as_dict()) with ds_strategy.scope(): if config.model_optimizations: tfmot.set_config(config.model_optimizations.as_dict()) if FLAGS.hub_module_url: model = train_lib.EfficientDetNetTrainHub( config=config, hub_module_url=FLAGS.hub_module_url) else: model = train_lib.EfficientDetNetTrain(config=config) model = setup_model(model, config) if FLAGS.pretrained_ckpt and not FLAGS.hub_module_url: ckpt_path = tf.train.latest_checkpoint(FLAGS.pretrained_ckpt) util_keras.restore_ckpt(model, ckpt_path, config.moving_average_decay) init_experimental(config) if 'train' in FLAGS.mode: val_dataset = get_dataset(False, config) if 'eval' in FLAGS.mode else None model.fit( get_dataset(True, config), epochs=config.num_epochs, steps_per_epoch=steps_per_epoch, callbacks=train_lib.get_callbacks(config.as_dict(), val_dataset), validation_data=val_dataset, validation_steps=(FLAGS.eval_samples // FLAGS.batch_size)) else: # Continuous eval. for ckpt in tf.train.checkpoints_iterator(FLAGS.model_dir, min_interval_secs=180): logging.info('Starting to evaluate.') # Terminate eval job when final checkpoint is reached. try: current_epoch = int(os.path.basename(ckpt).split('-')[1]) except IndexError: current_epoch = 0 val_dataset = get_dataset(False, config) logging.info('start loading model.') model.load_weights(tf.train.latest_checkpoint(FLAGS.model_dir)) logging.info('finish loading model.') coco_eval = train_lib.COCOCallback(val_dataset, 1) coco_eval.set_model(model) eval_results = coco_eval.on_epoch_end(current_epoch) logging.info('eval results for %s: %s', ckpt, eval_results) try: utils.archive_ckpt(eval_results, eval_results['AP'], ckpt) except tf.errors.NotFoundError: # Checkpoint might be not already deleted by the time eval finished. logging.info('Checkpoint %s no longer exists, skipping.', ckpt) if current_epoch >= config.num_epochs or not current_epoch: logging.info('Eval epoch %d / %d', current_epoch, config.num_epochs) break
def _generate_detections_tf(cls_outputs, box_outputs, anchor_boxes, indices, classes, image_id, image_scale, image_size, min_score_thresh=MIN_SCORE_THRESH, max_boxes_to_draw=MAX_DETECTIONS_PER_IMAGE, soft_nms_sigma=0.0, iou_threshold=0.5, use_native_nms=True): """Generates detections with model outputs and anchors. Args: cls_outputs: a numpy array with shape [N, 1], which has the highest class scores on all feature levels. The N is the number of selected top-K total anchors on all levels. (k being MAX_DETECTION_POINTS) box_outputs: a numpy array with shape [N, 4], which stacks box regression outputs on all feature levels. The N is the number of selected top-k total anchors on all levels. (k being MAX_DETECTION_POINTS) anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all feature levels. The N is the number of selected top-k total anchors on all levels. indices: a numpy array with shape [N], which is the indices from top-k selection. classes: a numpy array with shape [N], which represents the class prediction on all selected anchors from top-k selection. image_id: an integer number to specify the image id. image_scale: a float tensor representing the scale between original image and input image for the detector. It is used to rescale detections for evaluating with the original groundtruth annotations. image_size: a tuple (height, width) or an integer for image size. min_score_thresh: A float representing the threshold for deciding when to remove boxes based on score. max_boxes_to_draw: Max number of boxes to draw. soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter; See Bodla et al, https://arxiv.org/abs/1704.04503). When `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) NMS. iou_threshold: A float representing the threshold for deciding whether boxes overlap too much with respect to IOU. use_native_nms: a bool that indicates whether to use native nms. Returns: detections: detection results in a tensor with each row representing [image_id, ymin, xmin, ymax, xmax, score, class] """ if not image_size: raise ValueError( 'tf version generate_detection needs non-empty image_size') logging.info('Using tf version of post-processing.') anchor_boxes = tf.gather(anchor_boxes, indices) scores = tf.math.sigmoid(cls_outputs) # apply bounding box regression to anchors boxes = decode_box_outputs_tf(box_outputs, anchor_boxes) if use_native_nms: logging.info('Using native nms.') top_detection_idx, scores = tf.image.non_max_suppression_with_scores( boxes, scores, max_boxes_to_draw, iou_threshold=iou_threshold, score_threshold=min_score_thresh, soft_nms_sigma=soft_nms_sigma) boxes = tf.gather(boxes, top_detection_idx) else: logging.info('Using customized nms.') scores = tf.expand_dims(scores, axis=1) all_detections = tf.concat([boxes, scores], axis=1) top_detection_idx = nms_tf(all_detections, iou_threshold) detections = tf.gather(all_detections, top_detection_idx) scores = detections[:, 4] boxes = detections[:, :4] image_size = utils.parse_image_size(image_size) detections = tf.stack([ tf.cast(tf.tile(image_id, tf.shape(top_detection_idx)), tf.float64), tf.clip_by_value(boxes[:, 0], 0, image_size[0]) * image_scale, tf.clip_by_value(boxes[:, 1], 0, image_size[1]) * image_scale, tf.clip_by_value(boxes[:, 2], 0, image_size[0]) * image_scale, tf.clip_by_value(boxes[:, 3], 0, image_size[1]) * image_scale, scores, tf.cast(tf.gather(classes, top_detection_idx) + 1, tf.float64) ], axis=1) return detections
def main(_): # Parse and override hparams config = hparams_config.get_detection_config(FLAGS.model_name) config.override(FLAGS.hparams) if FLAGS.num_epochs: # NOTE: remove this flag after updating all docs. config.num_epochs = FLAGS.num_epochs # Parse image size in case it is in string format. config.image_size = utils.parse_image_size(config.image_size) if FLAGS.use_xla and FLAGS.strategy != 'tpu': tf.config.optimizer.set_jit(True) for gpu in tf.config.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) if FLAGS.debug: tf.config.experimental_run_functions_eagerly(True) tf.debugging.set_log_device_placement(True) tf.random.set_seed(111111) logging.set_verbosity(logging.DEBUG) if FLAGS.strategy == 'tpu': tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tf.config.experimental_connect_to_cluster(tpu_cluster_resolver) tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver) ds_strategy = tf.distribute.TPUStrategy(tpu_cluster_resolver) logging.info('All devices: %s', tf.config.list_logical_devices('TPU')) elif FLAGS.strategy == 'gpus': ds_strategy = tf.distribute.MirroredStrategy() logging.info('All devices: %s', tf.config.list_physical_devices('GPU')) else: if tf.config.list_physical_devices('GPU'): ds_strategy = tf.distribute.OneDeviceStrategy('device:GPU:0') else: ds_strategy = tf.distribute.OneDeviceStrategy('device:CPU:0') # Check data path if FLAGS.mode in ( 'train', 'train_and_eval') and FLAGS.training_file_pattern is None: raise RuntimeError( 'You must specify --training_file_pattern for training.') if FLAGS.mode in ('eval', 'train_and_eval'): if FLAGS.validation_file_pattern is None: raise RuntimeError('You must specify --validation_file_pattern ' 'for evaluation.') params = dict(config.as_dict(), model_name=FLAGS.model_name, iterations_per_loop=FLAGS.iterations_per_loop, model_dir=FLAGS.model_dir, num_examples_per_epoch=FLAGS.num_examples_per_epoch, strategy=FLAGS.strategy, batch_size=FLAGS.batch_size // ds_strategy.num_replicas_in_sync, num_shards=ds_strategy.num_replicas_in_sync, val_json_file=FLAGS.val_json_file, testdev_dir=FLAGS.testdev_dir, mode=FLAGS.mode) # set mixed precision policy by keras api. precision = utils.get_precision(params['strategy'], params['mixed_precision']) policy = tf.keras.mixed_precision.experimental.Policy(precision) tf.keras.mixed_precision.experimental.set_policy(policy) def get_dataset(is_training, params): file_pattern = (FLAGS.training_file_pattern if is_training else FLAGS.validation_file_pattern) return dataloader.InputReader( file_pattern, is_training=is_training, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=config.max_instances_per_image)(params) with ds_strategy.scope(): model = train_lib.EfficientDetNetTrain(params['model_name'], config) height, width = utils.parse_image_size(params['image_size']) model.build((params['batch_size'], height, width, 3)) model.compile( optimizer=train_lib.get_optimizer(params), loss={ 'box_loss': train_lib.BoxLoss(params['delta'], reduction=tf.keras.losses.Reduction.NONE), 'box_iou_loss': train_lib.BoxIouLoss(params['iou_loss_type'], params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size'], reduction=tf.keras.losses.Reduction.NONE), 'class_loss': train_lib.FocalLoss(params['alpha'], params['gamma'], label_smoothing=params['label_smoothing'], reduction=tf.keras.losses.Reduction.NONE) }) ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir) if ckpt_path: model.load_weights(ckpt_path) model.freeze_vars(params['var_freeze_expr']) model.fit(get_dataset(True, params=params), epochs=params['num_epochs'], steps_per_epoch=FLAGS.num_examples_per_epoch, callbacks=train_lib.get_callbacks(params, FLAGS.profile), validation_data=get_dataset(False, params=params), validation_steps=FLAGS.eval_samples) model.save_weights(os.path.join(FLAGS.model_dir, 'model'))
def main(_): if FLAGS.strategy == 'tpu': tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tpu_grpc_url = tpu_cluster_resolver.get_master() tf.Session.reset(tpu_grpc_url) else: tpu_cluster_resolver = None # Check data path if FLAGS.mode in ('train', 'train_and_eval'): if FLAGS.train_file_pattern is None: raise RuntimeError('Must specify --train_file_pattern for train.') if FLAGS.mode in ('eval', 'train_and_eval'): if FLAGS.val_file_pattern is None: raise RuntimeError('Must specify --val_file_pattern for eval.') # Parse and override hparams config = hparams_config.get_detection_config(FLAGS.model_name) config.override(FLAGS.hparams) if FLAGS.num_epochs: # NOTE: remove this flag after updating all docs. config.num_epochs = FLAGS.num_epochs # Parse image size in case it is in string format. config.image_size = utils.parse_image_size(config.image_size) # The following is for spatial partitioning. `features` has one tensor while # `labels` had 4 + (`max_level` - `min_level` + 1) * 2 tensors. The input # partition is performed on `features` and all partitionable tensors of # `labels`, see the partition logic below. # In the TPUEstimator context, the meaning of `shard` and `replica` is the # same; follwing the API, here has mixed use of both. if FLAGS.use_spatial_partition: # Checks input_partition_dims agrees with num_cores_per_replica. if FLAGS.num_cores_per_replica != np.prod(FLAGS.input_partition_dims): raise RuntimeError( '--num_cores_per_replica must be a product of array' 'elements in --input_partition_dims.') labels_partition_dims = { 'mean_num_positives': None, 'source_ids': None, 'groundtruth_data': None, 'image_scales': None, 'image_masks': None, } # The Input Partition Logic: We partition only the partition-able tensors. feat_sizes = utils.get_feat_sizes(config.get('image_size'), config.get('max_level')) for level in range(config.get('min_level'), config.get('max_level') + 1): def _can_partition(spatial_dim): partitionable_index = np.where( spatial_dim % np.array(FLAGS.input_partition_dims) == 0) return len(partitionable_index[0]) == len( FLAGS.input_partition_dims) spatial_dim = feat_sizes[level] if _can_partition(spatial_dim['height']) and _can_partition( spatial_dim['width']): labels_partition_dims['box_targets_%d' % level] = FLAGS.input_partition_dims labels_partition_dims['cls_targets_%d' % level] = FLAGS.input_partition_dims else: labels_partition_dims['box_targets_%d' % level] = None labels_partition_dims['cls_targets_%d' % level] = None num_cores_per_replica = FLAGS.num_cores_per_replica input_partition_dims = [ FLAGS.input_partition_dims, labels_partition_dims ] num_shards = FLAGS.num_cores // num_cores_per_replica else: num_cores_per_replica = None input_partition_dims = None num_shards = FLAGS.num_cores params = dict(config.as_dict(), model_name=FLAGS.model_name, iterations_per_loop=FLAGS.iterations_per_loop, model_dir=FLAGS.model_dir, num_shards=num_shards, num_examples_per_epoch=FLAGS.num_examples_per_epoch, strategy=FLAGS.strategy, backbone_ckpt=FLAGS.backbone_ckpt, ckpt=FLAGS.ckpt, val_json_file=FLAGS.val_json_file, testdev_dir=FLAGS.testdev_dir, profile=FLAGS.profile, mode=FLAGS.mode) config_proto = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) if FLAGS.strategy != 'tpu': if FLAGS.use_xla: config_proto.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_1) config_proto.gpu_options.allow_growth = True model_dir = FLAGS.model_dir model_fn_instance = det_model_fn.get_model_fn(FLAGS.model_name) max_instances_per_image = config.max_instances_per_image if FLAGS.eval_samples: eval_steps = int((FLAGS.eval_samples + FLAGS.eval_batch_size - 1) // FLAGS.eval_batch_size) else: eval_steps = None total_examples = int(config.num_epochs * FLAGS.num_examples_per_epoch) train_steps = total_examples // FLAGS.train_batch_size logging.info(params) if not tf.io.gfile.exists(model_dir): tf.io.gfile.makedirs(model_dir) config_file = os.path.join(model_dir, 'config.yaml') if not tf.io.gfile.exists(config_file): tf.io.gfile.GFile(config_file, 'w').write(str(config)) train_input_fn = dataloader.InputReader( FLAGS.train_file_pattern, is_training=True, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=max_instances_per_image) eval_input_fn = dataloader.InputReader( FLAGS.val_file_pattern, is_training=False, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=max_instances_per_image) if FLAGS.strategy == 'tpu': tpu_config = tf.estimator.tpu.TPUConfig( FLAGS.iterations_per_loop if FLAGS.strategy == 'tpu' else 1, num_cores_per_replica=num_cores_per_replica, input_partition_dims=input_partition_dims, per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig. PER_HOST_V2) run_config = tf.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=model_dir, log_step_count_steps=FLAGS.iterations_per_loop, session_config=config_proto, tpu_config=tpu_config, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tf_random_seed=FLAGS.tf_random_seed, ) # TPUEstimator can do both train and eval. train_est = tf.estimator.tpu.TPUEstimator( model_fn=model_fn_instance, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, config=run_config, params=params) eval_est = train_est else: strategy = None if FLAGS.strategy == 'gpus': strategy = tf.distribute.MirroredStrategy() run_config = tf.estimator.RunConfig( model_dir=model_dir, train_distribute=strategy, log_step_count_steps=FLAGS.iterations_per_loop, session_config=config_proto, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tf_random_seed=FLAGS.tf_random_seed, ) def get_estimator(global_batch_size): params['num_shards'] = getattr(strategy, 'num_replicas_in_sync', 1) params['batch_size'] = global_batch_size // params['num_shards'] return tf.estimator.Estimator(model_fn=model_fn_instance, config=run_config, params=params) # train and eval need different estimator due to different batch size. train_est = get_estimator(FLAGS.train_batch_size) eval_est = get_estimator(FLAGS.eval_batch_size) # start train/eval flow. if FLAGS.mode == 'train': train_est.train(input_fn=train_input_fn, max_steps=train_steps) if FLAGS.eval_after_train: eval_est.evaluate(input_fn=eval_input_fn, steps=eval_steps) elif FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in tf.train.checkpoints_iterator( FLAGS.model_dir, min_interval_secs=FLAGS.min_eval_interval, timeout=FLAGS.eval_timeout): logging.info('Starting to evaluate.') try: eval_results = eval_est.evaluate(eval_input_fn, steps=eval_steps) # Terminate eval job when final checkpoint is reached. try: current_step = int(os.path.basename(ckpt).split('-')[1]) except IndexError: logging.info('%s has no global step info: stop!', ckpt) break utils.archive_ckpt(eval_results, eval_results['AP'], ckpt) if current_step >= train_steps: logging.info('Eval finished step %d/%d', current_step, train_steps) break except tf.errors.NotFoundError: # Checkpoint might be not already deleted by the time eval finished. # We simply skip ssuch case. logging.info('Checkpoint %s no longer exists, skipping.', ckpt) elif FLAGS.mode == 'train_and_eval': ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) try: step = int(os.path.basename(ckpt).split('-')[1]) current_epoch = (step * FLAGS.train_batch_size // FLAGS.num_examples_per_epoch) logging.info('found ckpt at step %d (epoch %d)', step, current_epoch) except (IndexError, TypeError): logging.info('Folder %s has no ckpt with valid step.', FLAGS.model_dir) current_epoch = 0 def run_train_and_eval(e): print('\n =====> Starting training, epoch: %d.' % e) train_est.train(input_fn=train_input_fn, max_steps=e * FLAGS.num_examples_per_epoch // FLAGS.train_batch_size) print('\n =====> Starting evaluation, epoch: %d.' % e) eval_results = eval_est.evaluate(input_fn=eval_input_fn, steps=eval_steps) ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) utils.archive_ckpt(eval_results, eval_results['AP'], ckpt) epochs_per_cycle = 1 # higher number has less graph construction overhead. for e in range(current_epoch + 1, config.num_epochs + 1, epochs_per_cycle): if FLAGS.run_epoch_in_child_process: p = multiprocessing.Process(target=run_train_and_eval, args=(e, )) p.start() p.join() if p.exitcode != 0: return p.exitcode else: tf.reset_default_graph() run_train_and_eval(e) else: logging.info('Invalid mode: %s', FLAGS.mode)
def main(_): # Parse and override hparams config = hparams_config.get_detection_config(FLAGS.model_name) config.override(FLAGS.hparams) if FLAGS.num_epochs: # NOTE: remove this flag after updating all docs. config.num_epochs = FLAGS.num_epochs # Parse image size in case it is in string format. config.image_size = utils.parse_image_size(config.image_size) if FLAGS.use_xla and FLAGS.strategy != 'tpu': tf.config.optimizer.set_jit(True) for gpu in tf.config.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) if FLAGS.debug: tf.config.experimental_run_functions_eagerly(True) tf.debugging.set_log_device_placement(True) os.environ['TF_DETERMINISTIC_OPS'] = '1' tf.random.set_seed(FLAGS.tf_random_seed) logging.set_verbosity(logging.DEBUG) if FLAGS.strategy == 'tpu': tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tf.config.experimental_connect_to_cluster(tpu_cluster_resolver) tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver) ds_strategy = tf.distribute.TPUStrategy(tpu_cluster_resolver) logging.info('All devices: %s', tf.config.list_logical_devices('TPU')) elif FLAGS.strategy == 'gpus': ds_strategy = tf.distribute.MirroredStrategy() logging.info('All devices: %s', tf.config.list_physical_devices('GPU')) else: if tf.config.list_physical_devices('GPU'): ds_strategy = tf.distribute.OneDeviceStrategy('device:GPU:0') else: ds_strategy = tf.distribute.OneDeviceStrategy('device:CPU:0') steps_per_epoch = FLAGS.num_examples_per_epoch // FLAGS.batch_size params = dict(profile=FLAGS.profile, model_name=FLAGS.model_name, iterations_per_loop=FLAGS.iterations_per_loop, model_dir=FLAGS.model_dir, steps_per_epoch=steps_per_epoch, strategy=FLAGS.strategy, batch_size=FLAGS.batch_size, tf_random_seed=FLAGS.tf_random_seed, num_shards=ds_strategy.num_replicas_in_sync) config.override(params, True) # set mixed precision policy by keras api. precision = utils.get_precision(config.strategy, config.mixed_precision) policy = tf.keras.mixed_precision.experimental.Policy(precision) tf.keras.mixed_precision.experimental.set_policy(policy) def get_dataset(is_training, config): file_pattern = (FLAGS.training_file_pattern if is_training else FLAGS.validation_file_pattern) if not file_pattern: raise ValueError('No matching files.') return dataloader.InputReader( file_pattern, is_training=is_training, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=config.max_instances_per_image, debug=FLAGS.debug)(config.as_dict()) with ds_strategy.scope(): if config.model_optimizations: tfmot.set_config(config.model_optimizations.as_dict()) model = setup_model(config) if FLAGS.pretrained_ckpt: ckpt_path = tf.train.latest_checkpoint(FLAGS.pretrained_ckpt) util_keras.restore_ckpt(model, ckpt_path, config.moving_average_decay) init_experimental(config) model.fit(get_dataset(True, config), epochs=config.num_epochs, steps_per_epoch=steps_per_epoch, callbacks=train_lib.get_callbacks(config.as_dict()), validation_data=get_dataset(False, config).repeat(), validation_steps=(FLAGS.eval_samples // FLAGS.batch_size)) model.save_weights(os.path.join(FLAGS.model_dir, 'ckpt-final'))
def main(_): tf.config.run_functions_eagerly(FLAGS.debug) tf.config.optimizer.set_jit(FLAGS.use_xla) devices = tf.config.list_physical_devices('GPU') for device in devices: tf.config.experimental.set_memory_growth(device, True) model_config = hparams_config.get_detection_config(FLAGS.model_name) model_config.override(FLAGS.hparams) # Add custom overrides model_config.is_training_bn = False if FLAGS.image_size != -1: model_config.image_size = FLAGS.image_size model_config.image_size = utils.parse_image_size(model_config.image_size) model_params = model_config.as_dict() if FLAGS.mode == 'export': driver = infer_lib.KerasDriver(FLAGS.model_dir, FLAGS.debug, FLAGS.model_name, FLAGS.batch_size or None, FLAGS.only_network, model_params) if not FLAGS.saved_model_dir: raise ValueError('Please specify --saved_model_dir=') model_dir = FLAGS.saved_model_dir if tf.io.gfile.exists(model_dir): tf.io.gfile.rmtree(model_dir) driver.export(model_dir, FLAGS.tensorrt, FLAGS.tflite, FLAGS.file_pattern, FLAGS.num_calibration_steps) print('Model are exported to %s' % model_dir) elif FLAGS.mode == 'infer': driver = infer_lib.ServingDriver.create(FLAGS.model_dir, FLAGS.debug, FLAGS.saved_model_dir, FLAGS.model_name, FLAGS.batch_size or None, FLAGS.only_network, model_params) image_file = tf.io.read_file(FLAGS.input_image) image_arrays = tf.io.decode_image(image_file, channels=3, expand_animations=False) image_arrays = tf.expand_dims(image_arrays, axis=0) detections_bs = driver.serve(image_arrays) boxes, scores, classes, _ = tf.nest.map_structure( np.array, detections_bs) img = driver.visualize( np.array(image_arrays)[0], boxes[0], classes[0], scores[0], min_score_thresh=model_config.nms_configs.score_thresh or 0.4, max_boxes_to_draw=model_config.nms_configs.max_output_size) output_image_path = os.path.join(FLAGS.output_image_dir, '0.jpg') Image.fromarray(img).save(output_image_path) print('writing file to %s' % output_image_path) elif FLAGS.mode == 'benchmark': driver = infer_lib.ServingDriver.create(FLAGS.model_dir, FLAGS.debug, FLAGS.saved_model_dir, FLAGS.model_name, FLAGS.batch_size or None, FLAGS.only_network, model_params) batch_size = FLAGS.batch_size or 1 if FLAGS.input_image: image_file = tf.io.read_file(FLAGS.input_image) image_arrays = tf.io.decode_image(image_file, channels=3, expand_animations=False) image_arrays = tf.image.resize_with_pad(image_arrays, *model_config.image_size) image_arrays = tf.cast(tf.expand_dims(image_arrays, 0), tf.uint8) if batch_size > 1: image_arrays = tf.tile(image_arrays, [batch_size, 1, 1, 1]) else: # use synthetic data if no image is provided. image_arrays = tf.ones((batch_size, *model_config.image_size, 3), dtype=tf.uint8) if FLAGS.only_network: image_arrays, _ = driver._preprocess(image_arrays) driver.benchmark(image_arrays, FLAGS.bm_runs, FLAGS.trace_filename) elif FLAGS.mode == 'dry': # transfer to tf2 format ckpt driver = infer_lib.KerasDriver(FLAGS.model_dir, FLAGS.debug, FLAGS.model_name, FLAGS.batch_size or None, FLAGS.only_network, model_params) if FLAGS.export_ckpt: driver.model.save_weights(FLAGS.export_ckpt) elif FLAGS.mode == 'video': import cv2 # pylint: disable=g-import-not-at-top driver = infer_lib.ServingDriver.create(FLAGS.model_dir, FLAGS.debug, FLAGS.saved_model_dir, FLAGS.model_name, FLAGS.batch_size or None, FLAGS.only_network, model_params) cap = cv2.VideoCapture(FLAGS.input_video) if not cap.isOpened(): print('Error opening input video: {}'.format(FLAGS.input_video)) out_ptr = None if FLAGS.output_video: frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) out_ptr = cv2.VideoWriter( FLAGS.output_video, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), cap.get(5), (frame_width, frame_height)) while cap.isOpened(): # Capture frame-by-frame ret, frame = cap.read() if not ret: break raw_frames = np.array([frame]) detections_bs = driver.serve(raw_frames) boxes, scores, classes, _ = tf.nest.map_structure( np.array, detections_bs) new_frame = driver.visualize( raw_frames[0], boxes[0], classes[0], scores[0], min_score_thresh=model_config.nms_configs.score_thresh or 0.4, max_boxes_to_draw=model_config.nms_configs.max_output_size) if out_ptr: # write frame into output file. out_ptr.write(new_frame) else: # show the frame online, mainly used for real-time speed test. cv2.imshow('Frame', new_frame) # Press Q on keyboard to exit if cv2.waitKey(1) & 0xFF == ord('q'): break
def clip_boxes(boxes: T, image_size: int) -> T: """Clip boxes to fit the image size.""" image_size = utils.parse_image_size(image_size) * 2 return tf.clip_by_value(boxes, [0], image_size)