Beispiel #1
0
 def _eval(self):
     all_results_1st = eval_coco(self.df, lambda img: detect_one_image(img, self.pred_1st))
     all_results_2nd = eval_coco(self.df, lambda img: detect_one_image(img, self.pred_2nd))
     all_results_3rd = eval_coco(self.df, lambda img: detect_one_image(img, self.pred_3rd))
     output_file_1st = os.path.join(
         logger.get_logger_dir(), '1st_outputs{}.json'.format(self.global_step))
     output_file_2nd = os.path.join(
         logger.get_logger_dir(), '2nd_outputs{}.json'.format(self.global_step))
     output_file_3rd = os.path.join(
         logger.get_logger_dir(), '3rd_outputs{}.json'.format(self.global_step))
     with open(output_file_1st, 'w') as f:
         json.dump(all_results_1st, f)
     with open(output_file_2nd, 'w') as f:
         json.dump(all_results_2nd, f)
     with open(output_file_3rd, 'w') as f:
         json.dump(all_results_3rd, f)
     try:
         scores_1st = print_evaluation_scores(output_file_1st)
         scores_2nd = print_evaluation_scores(output_file_2nd)
         scores_3rd = print_evaluation_scores(output_file_3rd)
     except Exception:
         logger.exception("Exception in COCO evaluation.")
         scores = {}
     for k, v in scores_1st.items():
         self.trainer.monitors.put_scalar(k, v)
     for k, v in scores_2nd.items():
         self.trainer.monitors.put_scalar(k, v)
     for k, v in scores_3rd.items():
         self.trainer.monitors.put_scalar(k, v)
Beispiel #2
0
def offline_evaluate(pred_func, output_file):
    df = get_eval_dataflow()
    all_results = eval_coco(
        df, lambda img: detect_one_image(img, pred_func))
    with open(output_file, 'w') as f:
        json.dump(all_results, f)
    print_evaluation_scores(output_file)
Beispiel #3
0
def offline_evaluate(pred_func, output_file):
    df = get_eval_dataflow()
    all_results = eval_coco(
        df, lambda img: detect_one_image(img, pred_func))
    with open(output_file, 'w') as f:
        json.dump(all_results, f)
    print_evaluation_scores(output_file)
Beispiel #4
0
    def _eval(self):
        logdir = args.logdir
        if cfg.TRAINER == 'replicated':
            all_results = multithread_eval_coco(self.dataflows, self.predictors)
        else:
            filenames = [os.path.join(
                logdir, 'outputs{}-part{}.json'.format(self.global_step, rank)
            ) for rank in range(hvd.local_size())]

            if self._horovod_run_eval:
                local_results = eval_coco(self.dataflow, self.predictor)
                fname = filenames[hvd.local_rank()]
                with open(fname, 'w') as f:
                    json.dump(local_results, f)
            self.barrier.eval()
            if hvd.rank() > 0:
                return
            all_results = []
            for fname in filenames:
                with open(fname, 'r') as f:
                    obj = json.load(f)
                all_results.extend(obj)
                os.unlink(fname)

        output_file = os.path.join(
            logdir, 'outputs{}.json'.format(self.global_step))
        with open(output_file, 'w') as f:
            json.dump(all_results, f)
        try:
            scores = print_coco_metrics(output_file)
            for k, v in scores.items():
                self.trainer.monitors.put_scalar(k, v)
        except Exception:
            logger.exception("Exception in COCO evaluation.")
Beispiel #5
0
 def _eval(self):
     all_results = eval_coco(self.df, lambda img: detect_one_image(img, self.pred))
     output_file = os.path.join(
         logger.get_logger_dir(), 'outputs{}.json'.format(self.global_step))
     with open(output_file, 'w') as f:
         json.dump(all_results, f)
     scores = print_evaluation_scores(output_file)
     for k, v in scores.items():
         self.trainer.monitors.put_scalar(k, v)
Beispiel #6
0
 def _eval(self):
     all_results = eval_coco(self.df, lambda img: detect_one_image(img, self.pred))
     output_file = os.path.join(
         logger.get_logger_dir(), 'outputs{}.json'.format(self.global_step))
     with open(output_file, 'w') as f:
         json.dump(all_results, f)
     scores = print_evaluation_scores(output_file)
     for k, v in scores.items():
         self.trainer.monitors.put_scalar(k, v)
Beispiel #7
0
def offline_evaluate(pred_config, output_file):
    num_gpu = cfg.TRAIN.NUM_GPUS
    graph_funcs = MultiTowerOfflinePredictor(pred_config, list(
        range(num_gpu))).get_predictors()
    predictors = []
    dataflows = []
    for k in range(num_gpu):
        predictors.append(
            lambda img, pred=graph_funcs[k]: detect_one_image(img, pred))
        dataflows.append(get_eval_dataflow(shard=k, num_shards=num_gpu))
    if num_gpu > 1:
        all_results = multithread_eval_coco(dataflows, predictors)
    else:
        all_results = eval_coco(dataflows[0], predictors[0])
    with open(output_file, 'w') as f:
        json.dump(all_results, f)
    print_coco_metrics(output_file)
Beispiel #8
0
    def _eval(self):
        logdir = args.logdir
        if cfg.TRAINER == 'replicated':
            with ThreadPoolExecutor(max_workers=self.num_predictor, thread_name_prefix='EvalWorker') as executor, \
                    tqdm.tqdm(total=sum([df.size() for df in self.dataflows])) as pbar:
                futures = []
                for dataflow, pred in zip(self.dataflows, self.predictors):
                    futures.append(
                        executor.submit(eval_coco, dataflow, pred, pbar))
                all_results = list(
                    itertools.chain(*[fut.result() for fut in futures]))
        else:
            filenames = [
                os.path.join(
                    logdir,
                    'outputs{}-part{}.json'.format(self.global_step, rank))
                for rank in range(hvd.local_size())
            ]

            if self._horovod_run_eval:
                local_results = eval_coco(self.dataflow, self.predictor)
                fname = filenames[hvd.local_rank()]
                with open(fname, 'w') as f:
                    json.dump(local_results, f)
            self.barrier.eval()
            if hvd.rank() > 0:
                return
            all_results = []
            for fname in filenames:
                with open(fname, 'r') as f:
                    obj = json.load(f)
                all_results.extend(obj)
                os.unlink(fname)

        output_file = os.path.join(logdir,
                                   'outputs{}.json'.format(self.global_step))
        with open(output_file, 'w') as f:
            json.dump(all_results, f)
        try:
            scores = print_evaluation_scores(output_file)
            for k, v in scores.items():
                self.trainer.monitors.put_scalar(k, v)
        except Exception:
            logger.exception("Exception in COCO evaluation.")
Beispiel #9
0
    def _eval(self):
        logdir = args.logdir
        if cfg.TRAINER == 'replicated':
            with ThreadPoolExecutor(max_workers=self.num_predictor, thread_name_prefix='EvalWorker') as executor, \
                    tqdm.tqdm(total=sum([df.size() for df in self.dataflows])) as pbar:
                futures = []
                for dataflow, pred in zip(self.dataflows, self.predictors):
                    futures.append(executor.submit(eval_coco, dataflow, pred, pbar))
                all_results = list(itertools.chain(*[fut.result() for fut in futures]))
        else:
            if self._horovod_run_eval:
                local_results = eval_coco(self.dataflow, self.predictor)
                output_partial = os.path.join(
                    logdir, 'outputs{}-part{}.json'.format(self.global_step, hvd.local_rank()))
                with open(output_partial, 'w') as f:
                    json.dump(local_results, f)
            self.barrier.eval()
            if hvd.rank() > 0:
                return
            all_results = []
            for k in range(hvd.local_size()):
                output_partial = os.path.join(
                    logdir, 'outputs{}-part{}.json'.format(self.global_step, k))
                with open(output_partial, 'r') as f:
                    obj = json.load(f)
                all_results.extend(obj)
                os.unlink(output_partial)

        output_file = os.path.join(
            logdir, 'outputs{}.json'.format(self.global_step))
        with open(output_file, 'w') as f:
            json.dump(all_results, f)
        try:
            scores = print_evaluation_scores(output_file)
            for k, v in scores.items():
                self.trainer.monitors.put_scalar(k, v)
        except Exception:
            logger.exception("Exception in COCO evaluation.")
Beispiel #10
0
def offline_evaluate(pred_config, output_file):
    num_gpu = cfg.TRAIN.NUM_GPUS
    graph_funcs = MultiTowerOfflinePredictor(pred_config, list(
        range(num_gpu))).get_predictors()

    predictors = []
    for k in range(num_gpu):
        predictors.append(
            lambda img, pred=graph_funcs[k]: detect_one_image(img, pred))
    for dataset in cfg.DATA.VAL:
        logger.info("Evaluating {} ...".format(dataset))
        dataflows = [
            get_eval_dataflow(dataset, shard=k, num_shards=num_gpu)
            for k in range(num_gpu)
        ]
        if num_gpu > 1:
            all_results = multithread_eval_coco(dataflows, predictors)
        else:
            all_results = eval_coco(dataflows[0], predictors[0])
        output = output_file + '-' + dataset
        with open(output, 'w') as f:
            json.dump(all_results, f)
        print_coco_metrics(dataset, output)
Beispiel #11
0
    def _eval(self):
        if cfg.TRAINER == 'replicated':
            with ThreadPoolExecutor(max_workers=self.num_predictor, thread_name_prefix='EvalWorker') as executor, \
                    tqdm.tqdm(total=sum([df.size() for df in self.dataflows])) as pbar:
                futures = []
                for dataflow, pred in zip(self.dataflows, self.predictors):
                    futures.append(
                        executor.submit(eval_coco, dataflow, pred, pbar))
                all_results = list(
                    itertools.chain(*[fut.result() for fut in futures]))
        else:
            local_results = eval_coco(self.dataflow, self.predictor)
            results_as_arr = np.frombuffer(dumps(local_results),
                                           dtype=np.uint8)
            sizes, concat_arrs = tf.get_default_session().run(
                [self.string_lens, self.concat_results],
                feed_dict={self.local_result_tensor: results_as_arr})
            if hvd.rank() > 0:
                return
            all_results = []
            start = 0
            for size in sizes:
                substr = concat_arrs[start:start + size]
                results = loads(substr.tobytes())
                all_results.extend(results)
                start = start + size

        output_file = os.path.join(logger.get_logger_dir(),
                                   'outputs{}.json'.format(self.global_step))
        with open(output_file, 'w') as f:
            json.dump(all_results, f)
        try:
            scores = print_evaluation_scores(output_file)
            for k, v in scores.items():
                self.trainer.monitors.put_scalar(k, v)
        except Exception:
            logger.exception("Exception in COCO evaluation.")
Beispiel #12
0
def main():
    data_type = 'coco'
    data_root_dir = '/data/data_coco/'
    # model_depth = 50
    epoch_max = 100
    batch_size = 8

    if data_type == 'coco':
        dataset_train = CocoDataset(data_root_dir,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(data_root_dir,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    else:
        print('暂不支持')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=True)
    loader_train = DataLoader(dataset_train,
                              num_workers=8,
                              collate_fn=collater,
                              batch_sampler=sampler)
    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=batch_size,
                                          drop_last=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=8,
                            collate_fn=collater,
                            batch_sampler=sampler_val)

    retinanet = model.retinanet_50(dataset_train.num_classes(),
                                   pretrained=True)

    retinanet = retinanet.cuda()
    optimizer = torch.optim.Adam(retinanet.parameters(), lr=1e-4)
    # optimizer = torch.optim.SGD(retinanet.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True,
                                                           factor=0.5)

    model_pretrain_dir = './model/model_final.pt'
    if os.path.exists(model_pretrain_dir):
        print('pretrain model exist!')
        retinanet = torch.load(model_pretrain_dir)

    print('train images num: {}'.format(len(loader_train) * batch_size))
    for epoch_num in range(epoch_max):
        retinanet.train()
        epoch_loss = []
        for iter_num, data in enumerate(loader_train):
            optimizer.zero_grad()
            input_tensor = [data['img'].cuda().float(), data['annot']]
            classification_loss, regression_loss = retinanet(input_tensor)
            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            epoch_loss.append(float(loss))

            if loss.item() == 0:
                continue

            loss.backward()
            optimizer.step()

            print(
                'Epoch:{}/{} | Iters:{}/{} | C loss:{:.4f} | R loss:{:.4f} | Current loss:{:.4f} | Current LR:{:.7f}'
                .format(epoch_num + 1, epoch_max, iter_num + 1,
                        len(loader_train), float(classification_loss),
                        float(regression_loss), np.mean(epoch_loss),
                        optimizer.param_groups[0]['lr']))
            del classification_loss
            del regression_loss

        # 每个epoch 进行验证一次
        eval.eval_coco(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))
        torch.save(
            retinanet,
            './model/{}_retinanet_{}.pt'.format(data_type, epoch_num + 1))
    retinanet.eval()
    torch.save(retinanet, './model/model_final.pt')