def gen_dataset(self, model_spec, batch_size=None, is_training=True, use_fake_data=False): """Generate a batched tf.data.Dataset for training/evaluation. Args: model_spec: Specification for the model. batch_size: A integer, the returned dataset will be batched by this size. is_training: A boolean, when True, the returned dataset will be optionally shuffled and repeated as an endless dataset. use_fake_data: Use fake input. Returns: A TF dataset ready to be consumed by Keras model. """ reader = det_dataloader.InputReader( self.tfrecord_file_patten, is_training=is_training, use_fake_data=use_fake_data, max_instances_per_image=model_spec.config.max_instances_per_image, debug=model_spec.config.debug) self._dataset = reader(model_spec.config.as_dict(), batch_size=batch_size) return self._dataset
def get_dataset(is_training, config): file_pattern = (FLAGS.train_file_pattern if is_training else FLAGS.val_file_pattern) if not file_pattern: raise ValueError('No matching files.') return dataloader.InputReader( file_pattern, is_training=is_training, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=config.max_instances_per_image, debug=FLAGS.debug)(config.as_dict())
def main(_): config = hparams_config.get_efficientdet_config(FLAGS.model_name) config.override(FLAGS.hparams) config.val_json_file = FLAGS.val_json_file config.nms_configs.max_nms_inputs = anchors.MAX_DETECTION_POINTS config.drop_remainder = False # eval all examples w/o drop. config.image_size = utils.parse_image_size(config['image_size']) # Evaluator for AP calculation. label_map = label_util.get_label_map(config.label_map) evaluator = coco_metric.EvaluationMetric(filename=config.val_json_file, label_map=label_map) # dataset batch_size = 1 ds = dataloader.InputReader( FLAGS.val_file_pattern, is_training=False, max_instances_per_image=config.max_instances_per_image)( config, batch_size=batch_size) eval_samples = FLAGS.eval_samples if eval_samples: ds = ds.take((eval_samples + batch_size - 1) // batch_size) # Network lite_runner = LiteRunner(FLAGS.tflite_path) eval_samples = FLAGS.eval_samples or 5000 pbar = tf.keras.utils.Progbar( (eval_samples + batch_size - 1) // batch_size) for i, (images, labels) in enumerate(ds): cls_outputs, box_outputs = lite_runner.run(images) detections = postprocess.generate_detections(config, cls_outputs, box_outputs, labels['image_scales'], labels['source_ids']) detections = postprocess.transform_detections(detections) evaluator.update_state(labels['groundtruth_data'].numpy(), detections.numpy()) pbar.update(i) # compute the final eval results. metrics = evaluator.result() metric_dict = {} for i, name in enumerate(evaluator.metric_names): metric_dict[name] = metrics[i] if label_map: for i, cid in enumerate(sorted(label_map.keys())): name = 'AP_/%s' % label_map[cid] metric_dict[name] = metrics[i + len(evaluator.metric_names)] print(FLAGS.model_name, metric_dict)
def __init__(self, config): """Initializes RecordInspect with passed config. Args: config: config file to initialize input_fn. """ self.input_fn = dataloader.InputReader( FLAGS.file_pattern, is_training=not FLAGS.eval, use_fake_data=False, max_instances_per_image=config.max_instances_per_image) self.params = dict(config.as_dict(), batch_size=FLAGS.samples, model_name=FLAGS.model_name) logging.info(self.params) self.cls_to_label = config.label_map os.makedirs(FLAGS.save_samples_dir, exist_ok=True)
def main(_): if FLAGS.strategy == 'tpu': tf.disable_eager_execution() tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tpu_grpc_url = tpu_cluster_resolver.get_master() tf.Session.reset(tpu_grpc_url) else: tpu_cluster_resolver = None # Check data path if FLAGS.mode in ('train', 'train_and_eval'): if FLAGS.train_file_pattern is None: raise RuntimeError('Must specify --train_file_pattern for train.') if FLAGS.mode in ('eval', 'train_and_eval'): if FLAGS.val_file_pattern is None: raise RuntimeError('Must specify --val_file_pattern for eval.') # Parse and override hparams config = hparams_config.get_detection_config(FLAGS.model_name) config.override(FLAGS.hparams) if FLAGS.num_epochs: # NOTE: remove this flag after updating all docs. config.num_epochs = FLAGS.num_epochs # Parse image size in case it is in string format. config.image_size = utils.parse_image_size(config.image_size) # The following is for spatial partitioning. `features` has one tensor while # `labels` had 4 + (`max_level` - `min_level` + 1) * 2 tensors. The input # partition is performed on `features` and all partitionable tensors of # `labels`, see the partition logic below. # In the TPUEstimator context, the meaning of `shard` and `replica` is the # same; follwing the API, here has mixed use of both. if FLAGS.use_spatial_partition: # Checks input_partition_dims agrees with num_cores_per_replica. if FLAGS.num_cores_per_replica != np.prod(FLAGS.input_partition_dims): raise RuntimeError( '--num_cores_per_replica must be a product of array' 'elements in --input_partition_dims.') labels_partition_dims = { 'mean_num_positives': None, 'source_ids': None, 'groundtruth_data': None, 'image_scales': None, 'image_masks': None, } # The Input Partition Logic: We partition only the partition-able tensors. feat_sizes = utils.get_feat_sizes(config.get('image_size'), config.get('max_level')) for level in range(config.get('min_level'), config.get('max_level') + 1): def _can_partition(spatial_dim): partitionable_index = np.where( spatial_dim % np.array(FLAGS.input_partition_dims) == 0) return len(partitionable_index[0]) == len( FLAGS.input_partition_dims) spatial_dim = feat_sizes[level] if _can_partition(spatial_dim['height']) and _can_partition( spatial_dim['width']): labels_partition_dims['box_targets_%d' % level] = FLAGS.input_partition_dims labels_partition_dims['cls_targets_%d' % level] = FLAGS.input_partition_dims else: labels_partition_dims['box_targets_%d' % level] = None labels_partition_dims['cls_targets_%d' % level] = None num_cores_per_replica = FLAGS.num_cores_per_replica input_partition_dims = [ FLAGS.input_partition_dims, labels_partition_dims ] num_shards = FLAGS.num_cores // num_cores_per_replica else: num_cores_per_replica = None input_partition_dims = None num_shards = FLAGS.num_cores params = dict(config.as_dict(), model_name=FLAGS.model_name, iterations_per_loop=FLAGS.iterations_per_loop, model_dir=FLAGS.model_dir, num_shards=num_shards, num_examples_per_epoch=FLAGS.num_examples_per_epoch, strategy=FLAGS.strategy, backbone_ckpt=FLAGS.backbone_ckpt, ckpt=FLAGS.ckpt, val_json_file=FLAGS.val_json_file, testdev_dir=FLAGS.testdev_dir, profile=FLAGS.profile, mode=FLAGS.mode) config_proto = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) if FLAGS.strategy != 'tpu': if FLAGS.use_xla: config_proto.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_1) config_proto.gpu_options.allow_growth = True model_dir = FLAGS.model_dir model_fn_instance = det_model_fn.get_model_fn(FLAGS.model_name) max_instances_per_image = config.max_instances_per_image if FLAGS.eval_samples: eval_steps = int((FLAGS.eval_samples + FLAGS.eval_batch_size - 1) // FLAGS.eval_batch_size) else: eval_steps = None total_examples = int(config.num_epochs * FLAGS.num_examples_per_epoch) train_steps = total_examples // FLAGS.train_batch_size logging.info(params) if not tf.io.gfile.exists(model_dir): tf.io.gfile.makedirs(model_dir) config_file = os.path.join(model_dir, 'config.yaml') if not tf.io.gfile.exists(config_file): tf.io.gfile.GFile(config_file, 'w').write(str(config)) train_input_fn = dataloader.InputReader( FLAGS.train_file_pattern, is_training=True, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=max_instances_per_image) eval_input_fn = dataloader.InputReader( FLAGS.val_file_pattern, is_training=False, use_fake_data=FLAGS.use_fake_data, max_instances_per_image=max_instances_per_image) if FLAGS.strategy == 'tpu': tpu_config = tf.estimator.tpu.TPUConfig( FLAGS.iterations_per_loop if FLAGS.strategy == 'tpu' else 1, num_cores_per_replica=num_cores_per_replica, input_partition_dims=input_partition_dims, per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig. PER_HOST_V2) run_config = tf.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=model_dir, log_step_count_steps=FLAGS.iterations_per_loop, session_config=config_proto, tpu_config=tpu_config, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tf_random_seed=FLAGS.tf_random_seed, ) # TPUEstimator can do both train and eval. train_est = tf.estimator.tpu.TPUEstimator( model_fn=model_fn_instance, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, config=run_config, params=params) eval_est = train_est else: strategy = None if FLAGS.strategy == 'gpus': strategy = tf.distribute.MirroredStrategy() run_config = tf.estimator.RunConfig( model_dir=model_dir, train_distribute=strategy, log_step_count_steps=FLAGS.iterations_per_loop, session_config=config_proto, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tf_random_seed=FLAGS.tf_random_seed, ) def get_estimator(global_batch_size): params['num_shards'] = getattr(strategy, 'num_replicas_in_sync', 1) params['batch_size'] = global_batch_size // params['num_shards'] return tf.estimator.Estimator(model_fn=model_fn_instance, config=run_config, params=params) # train and eval need different estimator due to different batch size. train_est = get_estimator(FLAGS.train_batch_size) eval_est = get_estimator(FLAGS.eval_batch_size) # start train/eval flow. if FLAGS.mode == 'train': train_est.train(input_fn=train_input_fn, max_steps=train_steps) if FLAGS.eval_after_train: eval_est.evaluate(input_fn=eval_input_fn, steps=eval_steps) elif FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in tf.train.checkpoints_iterator( FLAGS.model_dir, min_interval_secs=FLAGS.min_eval_interval, timeout=FLAGS.eval_timeout): logging.info('Starting to evaluate.') try: eval_results = eval_est.evaluate(eval_input_fn, steps=eval_steps) # Terminate eval job when final checkpoint is reached. try: current_step = int(os.path.basename(ckpt).split('-')[1]) except IndexError: logging.info('%s has no global step info: stop!', ckpt) break utils.archive_ckpt(eval_results, eval_results['AP'], ckpt) if current_step >= train_steps: logging.info('Eval finished step %d/%d', current_step, train_steps) break except tf.errors.NotFoundError: # Checkpoint might be not already deleted by the time eval finished. # We simply skip ssuch case. logging.info('Checkpoint %s no longer exists, skipping.', ckpt) elif FLAGS.mode == 'train_and_eval': ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) try: step = int(os.path.basename(ckpt).split('-')[1]) current_epoch = (step * FLAGS.train_batch_size // FLAGS.num_examples_per_epoch) logging.info('found ckpt at step %d (epoch %d)', step, current_epoch) except (IndexError, TypeError): logging.info('Folder %s has no ckpt with valid step.', FLAGS.model_dir) current_epoch = 0 def run_train_and_eval(e): print('\n =====> Starting training, epoch: %d.' % e) train_est.train(input_fn=train_input_fn, max_steps=e * FLAGS.num_examples_per_epoch // FLAGS.train_batch_size) print('\n =====> Starting evaluation, epoch: %d.' % e) eval_results = eval_est.evaluate(input_fn=eval_input_fn, steps=eval_steps) ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) utils.archive_ckpt(eval_results, eval_results['AP'], ckpt) epochs_per_cycle = 1 # higher number has less graph construction overhead. for e in range(current_epoch + 1, config.num_epochs + 1, epochs_per_cycle): if FLAGS.run_epoch_in_child_process: p = multiprocessing.Process(target=run_train_and_eval, args=(e, )) p.start() p.join() if p.exitcode != 0: return p.exitcode else: tf.compat.v1.reset_default_graph() run_train_and_eval(e) else: logging.info('Invalid mode: %s', FLAGS.mode)
def export(self, output_dir: Optional[Text] = None, tensorrt: Optional[Text] = None, tflite: Optional[Text] = None, file_pattern: Optional[Text] = None, num_calibration_steps: int = 2000): """Export a saved model, frozen graph, and potential tflite/tensorrt model. Args: output_dir: the output folder for saved model. tensorrt: If not None, must be {'FP32', 'FP16', 'INT8'}. tflite: Type for post-training quantization. file_pattern: Glob for tfrecords, e.g. coco/val-*.tfrecord. num_calibration_steps: Number of post-training quantization calibration steps to run. """ export_model, input_spec = self._get_model_and_spec(tflite) image_size = utils.parse_image_size(self.params['image_size']) if output_dir: tf.saved_model.save( export_model, output_dir, signatures=export_model.__call__.get_concrete_function( input_spec)) logging.info('Model saved at %s', output_dir) # also save freeze pb file. graphdef = self.freeze( export_model.__call__.get_concrete_function(input_spec)) proto_path = tf.io.write_graph(graphdef, output_dir, self.model_name + '_frozen.pb', as_text=False) logging.info('Frozen graph saved at %s', proto_path) if tflite: shape = (self.batch_size, *image_size, 3) input_spec = tf.TensorSpec(shape=shape, dtype=input_spec.dtype, name=input_spec.name) # from_saved_model supports advanced converter features like op fusing. converter = tf.lite.TFLiteConverter.from_saved_model(output_dir) if tflite == 'FP32': converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_types = [tf.float32] elif tflite == 'FP16': converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_types = [tf.float16] elif tflite == 'INT8': # Enables MLIR-based post-training quantization. converter.experimental_new_quantizer = True if file_pattern: config = hparams_config.get_efficientdet_config( self.model_name) config.override(self.params) ds = dataloader.InputReader(file_pattern, is_training=False, max_instances_per_image=config. max_instances_per_image)( config, batch_size=self.batch_size) def representative_dataset_gen(): for image, _ in ds.take(num_calibration_steps): yield [image] else: # Used for debugging, can remove later. logging.warn( 'Use real representative dataset instead of fake ones.' ) num_calibration_steps = 10 def representative_dataset_gen( ): # rewrite this for real data. for _ in range(num_calibration_steps): yield [tf.ones(shape, dtype=input_spec.dtype)] converter.representative_dataset = representative_dataset_gen converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.inference_input_type = tf.uint8 # TFLite's custom NMS op isn't supported by post-training quant, # so we add TFLITE_BUILTINS as well. supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS_INT8, tf.lite.OpsSet.TFLITE_BUILTINS ] converter.target_spec.supported_ops = supported_ops else: raise ValueError( f'Invalid tflite {tflite}: must be FP32, FP16, INT8.') tflite_path = os.path.join(output_dir, tflite.lower() + '.tflite') tflite_model = converter.convert() tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_model) logging.info('TFLite is saved at %s', tflite_path) if tensorrt: trt_path = os.path.join(output_dir, 'tensorrt_' + tensorrt.lower()) conversion_params = tf.experimental.tensorrt.ConversionParams( max_workspace_size_bytes=(2 << 20), maximum_cached_engines=1, precision_mode=tensorrt.upper()) converter = tf.experimental.tensorrt.Converter( output_dir, conversion_params=conversion_params) converter.convert() converter.save(trt_path) logging.info('TensorRT model is saved at %s', trt_path)
def main(_): config = hparams_config.get_efficientdet_config(FLAGS.model_name) config.override(FLAGS.hparams) config.val_json_file = FLAGS.val_json_file config.nms_configs.max_nms_inputs = anchors.MAX_DETECTION_POINTS config.drop_remainder = False # eval all examples w/o drop. config.image_size = utils.parse_image_size(config['image_size']) # Evaluator for AP calculation. label_map = label_util.get_label_map(config.label_map) evaluator = coco_metric.EvaluationMetric(filename=config.val_json_file, label_map=label_map) # dataset batch_size = 1 ds = dataloader.InputReader( FLAGS.val_file_pattern, is_training=False, max_instances_per_image=config.max_instances_per_image)( config, batch_size=batch_size) eval_samples = FLAGS.eval_samples if eval_samples: ds = ds.take((eval_samples + batch_size - 1) // batch_size) # Network lite_runner = LiteRunner(FLAGS.tflite_path, FLAGS.only_network) eval_samples = FLAGS.eval_samples or 5000 pbar = tf.keras.utils.Progbar( (eval_samples + batch_size - 1) // batch_size) for i, (images, labels) in enumerate(ds): if not FLAGS.only_network: nms_boxes_bs, nms_classes_bs, nms_scores_bs, _ = lite_runner.run( images) nms_classes_bs += postprocess.CLASS_OFFSET height, width = utils.parse_image_size(config.image_size) normalize_factor = tf.constant([height, width, height, width], dtype=tf.float32) nms_boxes_bs *= normalize_factor if labels['image_scales'] is not None: scales = tf.expand_dims( tf.expand_dims(labels['image_scales'], -1), -1) nms_boxes_bs = nms_boxes_bs * tf.cast(scales, nms_boxes_bs.dtype) detections = postprocess.generate_detections_from_nms_output( nms_boxes_bs, nms_classes_bs, nms_scores_bs, labels['source_ids']) else: cls_outputs, box_outputs = lite_runner.run(images) detections = postprocess.generate_detections( config, cls_outputs, box_outputs, labels['image_scales'], labels['source_ids'], pre_class_nms=FLAGS.pre_class_nms) detections = postprocess.transform_detections(detections) evaluator.update_state(labels['groundtruth_data'].numpy(), detections.numpy()) pbar.update(i) # compute the final eval results. metrics = evaluator.result() metric_dict = {} for i, name in enumerate(evaluator.metric_names): metric_dict[name] = metrics[i] if label_map: for i, cid in enumerate(sorted(label_map.keys())): name = 'AP_/%s' % label_map[cid] metric_dict[name] = metrics[i + len(evaluator.metric_names)] print(FLAGS.model_name, metric_dict)
def main(_): config = hparams_config.get_efficientdet_config(FLAGS.model_name) config.override(FLAGS.hparams) config.val_json_file = FLAGS.val_json_file config.nms_configs.max_nms_inputs = anchors.MAX_DETECTION_POINTS config.drop_remainder = False # eval all examples w/o drop. config.image_size = utils.parse_image_size(config['image_size']) if config.strategy == 'tpu': tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) tf.config.experimental_connect_to_cluster(tpu_cluster_resolver) tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver) ds_strategy = tf.distribute.TPUStrategy(tpu_cluster_resolver) logging.info('All devices: %s', tf.config.list_logical_devices('TPU')) elif config.strategy == 'gpus': ds_strategy = tf.distribute.MirroredStrategy() logging.info('All devices: %s', tf.config.list_physical_devices('GPU')) else: if tf.config.list_physical_devices('GPU'): ds_strategy = tf.distribute.OneDeviceStrategy('device:GPU:0') else: ds_strategy = tf.distribute.OneDeviceStrategy('device:CPU:0') with ds_strategy.scope(): # Network model = efficientdet_keras.EfficientDetNet(config=config) model.build((None, *config.image_size, 3)) util_keras.restore_ckpt(model, tf.train.latest_checkpoint(FLAGS.model_dir), config.moving_average_decay, skip_mismatch=False) @tf.function def model_fn(images, labels): cls_outputs, box_outputs = model(images, training=False) detections = postprocess.generate_detections( config, cls_outputs, box_outputs, labels['image_scales'], labels['source_ids']) tf.numpy_function(evaluator.update_state, [ labels['groundtruth_data'], postprocess.transform_detections(detections) ], []) # Evaluator for AP calculation. label_map = label_util.get_label_map(config.label_map) evaluator = coco_metric.EvaluationMetric(filename=config.val_json_file, label_map=label_map) # dataset batch_size = FLAGS.batch_size # global batch size. ds = dataloader.InputReader( FLAGS.val_file_pattern, is_training=False, max_instances_per_image=config.max_instances_per_image)( config, batch_size=batch_size) if FLAGS.eval_samples: ds = ds.take((FLAGS.eval_samples + batch_size - 1) // batch_size) ds = ds_strategy.experimental_distribute_dataset(ds) # evaluate all images. eval_samples = FLAGS.eval_samples or 5000 pbar = tf.keras.utils.Progbar( (eval_samples + batch_size - 1) // batch_size) for i, (images, labels) in enumerate(ds): ds_strategy.run(model_fn, (images, labels)) pbar.update(i) # compute the final eval results. metrics = evaluator.result() metric_dict = {} for i, name in enumerate(evaluator.metric_names): metric_dict[name] = metrics[i] if label_map: for i, cid in enumerate(sorted(label_map.keys())): name = 'AP_/%s' % label_map[cid] metric_dict[name] = metrics[i + len(evaluator.metric_names)] print(FLAGS.model_name, metric_dict)