def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS if cfg.TRAINER == 'replicated': # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750 buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)] # Use two predictor threads per GPU to get better throughput self.num_predictor = num_gpu if buggy_tf else num_gpu * 2 self.predictors = [ self._build_predictor(k % num_gpu) for k in range(self.num_predictor) ] self.dataflows = [ get_eval_dataflow(self._eval_dataset, shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor) ] else: # Only eval on the first machine, # Because evaluation assumes that all horovod workers share the filesystem. # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs self._horovod_run_eval = hvd.rank() == hvd.local_rank() if self._horovod_run_eval: self.predictor = self._build_predictor(0) self.dataflow = get_eval_dataflow(self._eval_dataset, shard=hvd.local_rank(), num_shards=hvd.local_size()) self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
def do_evaluate(pred_config, output_file): num_tower = max(cfg.TRAIN.NUM_GPUS, 1) graph_funcs = MultiTowerOfflinePredictor(pred_config, list( range(num_tower))).get_predictors() for dataset in cfg.DATA.VAL: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_tower) for k in range(num_tower) ] all_results = multithread_predict_dataflow(dataflows, graph_funcs) output = output_file + '-' + dataset DatasetRegistry.get(dataset).eval_inference_results( all_results, output)
if args.visualize: do_visualize(MODEL, args.load) else: predcfg = PredictConfig( model=MODEL, session_init=SmartInit(args.load), input_names=MODEL.get_inference_tensor_names()[0], output_names=MODEL.get_inference_tensor_names()[1]) if args.compact: ModelExporter(predcfg).export_compact(args.compact, optimize=False) elif args.serving: ModelExporter(predcfg).export_serving(args.serving, optimize=False) if args.predict: predictor = OfflinePredictor(predcfg) for image_file in args.predict: do_predict(predictor, image_file) elif args.evaluate: assert args.evaluate.endswith('.json'), args.evaluate do_evaluate(predcfg, args.evaluate) elif args.benchmark: df = get_eval_dataflow(cfg.DATA.VAL[0]) df.reset_state() predictor = OfflinePredictor(predcfg) for _, img in enumerate(tqdm.tqdm(df, total=len(df), smoothing=0.5)): # This includes post-processing time, which is done on CPU and not optimized # To exclude it, modify `predict_image`. predict_image(img[0], predictor)