Ejemplo n.º 1
0
    def _setup_graph(self):
        num_gpu = cfg.TRAIN.NUM_GPUS
        if cfg.TRAINER == 'replicated':
            # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750
            buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)]

            # Use two predictor threads per GPU to get better throughput
            self.num_predictor = num_gpu if buggy_tf else num_gpu * 2
            self.predictors = [
                self._build_predictor(k % num_gpu)
                for k in range(self.num_predictor)
            ]
            self.dataflows = [
                get_eval_dataflow(self._eval_dataset,
                                  shard=k,
                                  num_shards=self.num_predictor)
                for k in range(self.num_predictor)
            ]
        else:
            # Eval on all ranks and use gather
            self.predictor = self._build_predictor(0)

            if self.batched:
                self.dataflow = get_batched_eval_dataflow(
                    self._eval_dataset,
                    shard=hvd.rank(),
                    num_shards=hvd.size(),
                    batch_size=self.batch_size)
            else:
                self.dataflow = get_eval_dataflow(self._eval_dataset,
                                                  shard=hvd.rank(),
                                                  num_shards=hvd.size())
Ejemplo n.º 2
0
def do_evaluate(pred_config, output_file):
    num_gpu = cfg.TRAIN.NUM_GPUS
    graph_funcs = MultiTowerOfflinePredictor(pred_config, list(
        range(num_gpu))).get_predictors()

    for dataset in cfg.DATA.VAL:
        logger.info("Evaluating {} ...".format(dataset))
        dataflows = [
            get_eval_dataflow(dataset, shard=k, num_shards=num_gpu)
            for k in range(num_gpu)
        ]
        all_results = multithread_predict_dataflow(dataflows, graph_funcs)
        output = output_file + '-' + dataset
        DetectionDataset().eval_or_save_inference_results(
            all_results, dataset, output)
    for dataset in cfg.DATA.TEST:
        logger.info("Evaluating {} ...".format(dataset))
        dataflows = [
            get_eval_dataflow(dataset, shard=k, num_shards=num_gpu)
            for k in range(num_gpu)
        ]
        all_results = multithread_predict_dataflow(dataflows, graph_funcs)
        output = output_file + '-' + dataset
        DetectionDataset().eval_or_save_inference_results(
            all_results, dataset, output)
Ejemplo n.º 3
0
    def _setup_graph(self):
        num_gpu = cfg.TRAIN.NUM_GPUS
        if cfg.TRAINER == 'replicated':
            # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750
            buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)]

            # Use two predictor threads per GPU to get better throughput
            self.num_predictor = num_gpu if buggy_tf else num_gpu * 2
            self.predictors = [
                self._build_predictor(k % num_gpu)
                for k in range(self.num_predictor)
            ]
            self.dataflows = [
                get_eval_dataflow(self._eval_dataset,
                                  shard=k,
                                  num_shards=self.num_predictor)
                for k in range(self.num_predictor)
            ]
        else:
            # Only eval on the first machine,
            # Because evaluation assumes that all horovod workers share the filesystem.
            # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs
            self._horovod_run_eval = hvd.rank() == hvd.local_rank()
            if self._horovod_run_eval:
                self.predictor = self._build_predictor(0)
                self.dataflow = get_eval_dataflow(self._eval_dataset,
                                                  shard=hvd.local_rank(),
                                                  num_shards=hvd.local_size())

            self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
Ejemplo n.º 4
0
    def set_up_graph(self, trainer: tp.Trainer) -> None:
        self.trainer = trainer
        if self.trainer_type == "replicated":
            # Use multiple predictor threads per GPU to get better throughput.
            self.num_predictor = self.num_gpus * 2
            self.predictors = [
                self._build_predictor(k % self.num_gpus) for k in range(self.num_predictor)
            ]
            self.dataflows = [
                get_eval_dataflow(  # type: ignore
                    self._eval_dataset,
                    self.is_aws,
                    self.is_gcs,
                    shard=k,
                    num_shards=self.num_predictor,
                )
                for k in range(self.num_predictor)
            ]
        else:
            if self.machine_rank == 0:
                # Run validation on one machine.
                self.predictor = self._build_predictor(0)
                self.dataflow = get_eval_dataflow(
                    self._eval_dataset,
                    self.is_aws,
                    self.is_gcs,
                    shard=hvd.local_rank(),
                    num_shards=hvd.local_size(),
                )

            # All workers must take part in this barrier, even if they
            # are not performing validation.
            self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
Ejemplo n.º 5
0
    def _setup_graph(self):
        num_gpu = cfg.TRAIN.NUM_GPUS
        if cfg.TRAINER == 'replicated':
            # Use two predictor threads per GPU to get better throughput
            self.num_predictor = num_gpu * 2
            self.predictors = [
                self._build_coco_predictor(k % num_gpu)
                for k in range(self.num_predictor)
            ]
            self.dataflows = [
                get_eval_dataflow(shard=k, num_shards=self.num_predictor)
                for k in range(self.num_predictor)
            ]
        else:
            self.predictor = self._build_coco_predictor(0)
            self.dataflow = get_eval_dataflow(shard=hvd.rank(),
                                              num_shards=hvd.size())

            # use uint8 to aggregate strings
            self.local_result_tensor = tf.placeholder(
                tf.uint8, shape=[None], name='local_result_string')
            self.concat_results = hvd.allgather(self.local_result_tensor,
                                                name='concat_results')
            local_size = tf.expand_dims(tf.size(self.local_result_tensor), 0)
            self.string_lens = hvd.allgather(local_size, name='concat_sizes')
Ejemplo n.º 6
0
 def _setup_graph(self):
     num_gpu = cfg.TRAIN.NUM_GPUS
     # Use two predictor threads per GPU to get better throughput
     self.num_predictor = 1 if cfg.TRAINER == 'horovod' else num_gpu * 2
     self.predictors = [self._build_coco_predictor(k % num_gpu) for k in range(self.num_predictor)]
     self.dataflows = [get_eval_dataflow(shard=k, num_shards=self.num_predictor)
                       for k in range(self.num_predictor)]
Ejemplo n.º 7
0
def offline_evaluate(pred_func, output_file):
    df = get_eval_dataflow()
    all_results = eval_coco(
        df, lambda img: detect_one_image(img, pred_func))
    with open(output_file, 'w') as f:
        json.dump(all_results, f)
    print_evaluation_scores(output_file)
Ejemplo n.º 8
0
def offline_evaluate(pred_func, output_file):
    df = get_eval_dataflow()
    all_results = eval_on_dataflow(
        df, lambda img: detect_one_image(img, pred_func))
    with open(output_file, 'w') as f:
        json.dump(all_results, f)
    print_evaluation_scores(output_file)
Ejemplo n.º 9
0
    def _setup_graph(self):
        num_gpu = cfg.TRAIN.NUM_GPUS
        if cfg.TRAINER == 'replicated':
            # Use two predictor threads per GPU to get better throughput
            self.num_predictor = num_gpu * 2
            self.predictors = [self._build_coco_predictor(k % num_gpu) for k in range(self.num_predictor)]
            self.dataflows = [get_eval_dataflow(shard=k, num_shards=self.num_predictor)
                              for k in range(self.num_predictor)]
        else:
            # Only eval on the first machine.
            # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs
            self._horovod_run_eval = hvd.rank() == hvd.local_rank()
            if self._horovod_run_eval:
                self.predictor = self._build_coco_predictor(0)
                self.dataflow = get_eval_dataflow(shard=hvd.local_rank(), num_shards=hvd.local_size())

            self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
Ejemplo n.º 10
0
    def _setup_graph(self):
        num_gpu = cfg.TRAIN.NUM_GPUS
        if cfg.TRAINER == 'replicated':
            # Use two predictor threads per GPU to get better throughput
            self.num_predictor = num_gpu * 2
            self.predictors = [self._build_coco_predictor(k % num_gpu) for k in range(self.num_predictor)]
            self.dataflows = [get_eval_dataflow(shard=k, num_shards=self.num_predictor)
                              for k in range(self.num_predictor)]
        else:
            # Only eval on the first machine.
            # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs
            self._horovod_run_eval = hvd.rank() == hvd.local_rank()
            if self._horovod_run_eval:
                self.predictor = self._build_coco_predictor(0)
                self.dataflow = get_eval_dataflow(shard=hvd.local_rank(), num_shards=hvd.local_size())

            self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
Ejemplo n.º 11
0
    def _setup_graph(self):
        self.pred = self.trainer.get_predictor(
            ['image'], ['fastrcnn_fg_probs', 'fastrcnn_fg_boxes'])
        self.df = PrefetchDataZMQ(get_eval_dataflow(), 1)

        EVAL_TIMES = 5  # eval 5 times during training
        interval = self.trainer.config.max_epoch // (EVAL_TIMES + 1)
        self.epochs_to_eval = set([interval * k for k in range(1, EVAL_TIMES)])
        self.epochs_to_eval.add(self.trainer.config.max_epoch)
        get_tf_nms()  # just to make sure the nms part of graph is created
def do_evaluate(pred_config, output_file):
    num_tower = max(cfg.TRAIN.NUM_GPUS, 1)
    graph_funcs = MultiTowerOfflinePredictor(
        pred_config, list(range(num_tower))).get_predictors()

    for dataset in cfg.DATA.VAL:
        logger.info("Evaluating {} ...".format(dataset))
        dataflows = [
            get_eval_dataflow(dataset, shard=k, num_shards=num_tower)
            for k in range(num_tower)]
        all_results = multithread_predict_dataflow(dataflows, graph_funcs)
        output = output_file + '-' + dataset
        DatasetRegistry.get(dataset).eval_inference_results(all_results, output)
Ejemplo n.º 13
0
def offline_evaluate(model_path, output_file):
    pred = OfflinePredictor(
        PredictConfig(model=Model(),
                      session_init=get_model_loader(model_path),
                      input_names=['image'],
                      output_names=[
                          'fastrcnn_fg_probs',
                          'fastrcnn_fg_boxes',
                      ]))
    df = get_eval_dataflow()
    df = PrefetchDataZMQ(df, 1)
    all_results = eval_on_dataflow(df, lambda img: detect_one_image(img, pred))
    with open(output_file, 'w') as f:
        json.dump(all_results, f)
    print_evaluation_scores(output_file)
Ejemplo n.º 14
0
def do_evaluate(pred_config, output_file, batch_size):
    '''
    Multi-gpu evaluation, if available
    '''
    num_tower = max(cfg.TRAIN.NUM_GPUS, 1)
    graph_funcs = MultiTowerOfflinePredictor(pred_config, list(
        range(num_tower))).get_predictors()
    dataflows = [
        get_eval_dataflow(batch_size, shard=k, num_shards=num_tower)
        for k in range(num_tower)
    ]
    all_results = multithread_pred_dataflow(dataflows, graph_funcs)
    # df = get_eval_dataflow()
    # all_results = pred_dataflow(df, lambda img: detect_batch(img, pred_func))
    logger.info('Dumping evaluation results')
    np.savez(output_file, **all_results)
    return print_evaluation_scores(output_file)
Ejemplo n.º 15
0
def offline_evaluate(pred_config, output_file):
    num_gpu = cfg.TRAIN.NUM_GPUS
    graph_funcs = MultiTowerOfflinePredictor(pred_config, list(
        range(num_gpu))).get_predictors()
    predictors = []
    dataflows = []
    for k in range(num_gpu):
        predictors.append(
            lambda img, pred=graph_funcs[k]: detect_one_image(img, pred))
        dataflows.append(get_eval_dataflow(shard=k, num_shards=num_gpu))
    if num_gpu > 1:
        all_results = multithread_eval_coco(dataflows, predictors)
    else:
        all_results = eval_coco(dataflows[0], predictors[0])
    with open(output_file, 'w') as f:
        json.dump(all_results, f)
    print_coco_metrics(output_file)
Ejemplo n.º 16
0
def evaluate_rcnn(model_name, paper_arxiv_id, cfg_list, model_file):
    evaluator = COCOEvaluator(
        root=COCO_ROOT, model_name=model_name, paper_arxiv_id=paper_arxiv_id
    )
    category_id_to_coco_id = {
        v: k for k, v in COCODetection.COCO_id_to_category_id.items()
    }

    cfg.update_config_from_args(cfg_list)  # TODO backup/restore config
    finalize_configs(False)
    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()
    predcfg = PredictConfig(
        model=MODEL,
        session_init=SmartInit(model_file),
        input_names=MODEL.get_inference_tensor_names()[0],
        output_names=MODEL.get_inference_tensor_names()[1],
    )
    predictor = OfflinePredictor(predcfg)

    def xyxy_to_xywh(box):
        box[2] -= box[0]
        box[3] -= box[1]
        return box

    df = get_eval_dataflow("coco_val2017")
    df.reset_state()
    for img, img_id in tqdm.tqdm(df, total=len(df)):
        results = predict_image(img, predictor)
        res = [
            {
                "image_id": img_id,
                "category_id": category_id_to_coco_id.get(
                    int(r.class_id), int(r.class_id)
                ),
                "bbox": xyxy_to_xywh([round(float(x), 4) for x in r.box]),
                "score": round(float(r.score), 3),
            }
            for r in results
        ]
        evaluator.add(res)
        if evaluator.cache_exists:
            break

    evaluator.save()
Ejemplo n.º 17
0
def offline_evaluate(pred_config, output_file):
    num_gpu = cfg.TRAIN.NUM_GPUS
    graph_funcs = MultiTowerOfflinePredictor(pred_config, list(
        range(num_gpu))).get_predictors()

    predictors = []
    for k in range(num_gpu):
        predictors.append(
            lambda img, pred=graph_funcs[k]: detect_one_image(img, pred))
    for dataset in cfg.DATA.VAL:
        logger.info("Evaluating {} ...".format(dataset))
        dataflows = [
            get_eval_dataflow(dataset, shard=k, num_shards=num_gpu)
            for k in range(num_gpu)
        ]
        if num_gpu > 1:
            all_results = multithread_eval_coco(dataflows, predictors)
        else:
            all_results = eval_coco(dataflows[0], predictors[0])
        output = output_file + '-' + dataset
        with open(output, 'w') as f:
            json.dump(all_results, f)
        print_coco_metrics(dataset, output)
Ejemplo n.º 18
0
 def _setup_graph(self):
     self.pred_1st = self.trainer.get_predictor(self._in_names, self._out_names_1st)
     self.pred_2nd = self.trainer.get_predictor(self._in_names, self._out_names_2nd)
     self.pred_3rd = self.trainer.get_predictor(self._in_names, self._out_names_3rd)
     self.df = get_eval_dataflow()
Ejemplo n.º 19
0
 def _setup_graph(self):
     self.pred = self.trainer.get_predictor(
         ['image'], get_model_output_names())
     self.df = get_eval_dataflow()
Ejemplo n.º 20
0
 def _setup_graph(self):
     self.pred = self.trainer.get_predictor(self._in_names, self._out_names)
     self.df = get_eval_dataflow()
Ejemplo n.º 21
0
 def _setup_graph(self):
     self.pred = self.trainer.get_predictor(self._in_names, self._out_names)
     self.df = get_eval_dataflow()
Ejemplo n.º 22
0
 def _setup_graph(self):
     self.pred = self.trainer.get_predictor(
         ['image'], ['final_boxes', 'final_probs', 'final_labels'])
     self.df = get_eval_dataflow()
Ejemplo n.º 23
0
def offline_evaluate(pred_func, output_file):
    df = get_eval_dataflow()
Ejemplo n.º 24
0
 def _setup_graph(self):
     self.pred = self.trainer.get_predictor(['image'],
                                            get_model_output_names())
     self.df = get_eval_dataflow()
Ejemplo n.º 25
0
    if args.visualize:
        do_visualize(MODEL, args.load)
    else:
        predcfg = PredictConfig(
            model=MODEL,
            session_init=get_model_loader(args.load),
            input_names=MODEL.get_inference_tensor_names()[0],
            output_names=MODEL.get_inference_tensor_names()[1])

        if args.compact:
            ModelExporter(predcfg).export_compact(args.compact, optimize=False)
        elif args.serving:
            ModelExporter(predcfg).export_serving(args.serving, optimize=False)

        if args.predict:
            predictor = OfflinePredictor(predcfg)
            for image_file in args.predict:
                do_predict(predictor, image_file)
        elif args.evaluate:
            assert args.evaluate.endswith('.json'), args.evaluate
            do_evaluate(predcfg, args.evaluate)
        elif args.benchmark:
            df = get_eval_dataflow(cfg.DATA.VAL[0])
            df.reset_state()
            predictor = OfflinePredictor(predcfg)
            for img in tqdm.tqdm(df, total=len(df)):
                # This include post-processing time, which is done on CPU and not optimized
                # To exclude it, modify `predict_image`.
                predict_image(img[0], predictor)
Ejemplo n.º 26
0
 def _setup_graph(self):
     self.pred = self.trainer.get_predictor(
         ['image'], ['fastrcnn_fg_probs', 'fastrcnn_fg_boxes'])
     self.df = PrefetchDataZMQ(get_eval_dataflow(), 1)
     get_tf_nms()  # just to make sure the nms part of graph is created