Example #1
0
def do_evaluation(cfg, model, output_dir, distributed):
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model = model.module
    assert isinstance(model, SSD), 'Wrong module.'
    test_datasets = build_dataset(dataset_list=cfg.DATASETS.TEST, is_test=True)
    device = torch.device(cfg.MODEL.DEVICE)
    model.eval()
    if not model.is_test:
        model.is_test = True
    predictor = Predictor(cfg=cfg,
                          model=model,
                          iou_threshold=cfg.TEST.NMS_THRESHOLD,
                          score_threshold=cfg.TEST.CONFIDENCE_THRESHOLD,
                          device=device)

    cpu_device = torch.device("cpu")
    logger = logging.getLogger("SSD.inference")
    for dataset_name, test_dataset in zip(cfg.DATASETS.TEST, test_datasets):
        logger.info("Test dataset {} size: {}".format(dataset_name,
                                                      len(test_dataset)))
        indices = list(range(len(test_dataset)))
        if distributed:
            indices = indices[distributed_util.get_rank()::distributed_util.
                              get_world_size()]

        # show progress bar only on main process.
        progress_bar = tqdm if distributed_util.is_main_process() else iter
        logger.info('Progress on {} 0:'.format(cfg.MODEL.DEVICE.upper()))
        predictions = {}
        for i in progress_bar(indices):
            image = test_dataset.get_image(i)
            output = predictor.predict(image)
            boxes, labels, scores = [o.to(cpu_device).numpy() for o in output]
            predictions[i] = (boxes, labels, scores)
        distributed_util.synchronize()
        predictions = _accumulate_predictions_from_multiple_gpus(predictions)
        if not distributed_util.is_main_process():
            return

        final_output_dir = os.path.join(output_dir, dataset_name)
        if not os.path.exists(final_output_dir):
            os.makedirs(final_output_dir)
        torch.save(predictions,
                   os.path.join(final_output_dir, 'predictions.pth'))
        evaluate(dataset=test_dataset,
                 predictions=predictions,
                 output_dir=final_output_dir)
Example #2
0
def _evaluation(cfg, dataset_name, test_dataset, predictor, distributed, output_dir):
    """ Perform evaluating on one dataset
    Args:
        cfg:
        dataset_name: dataset's name
        test_dataset: Dataset object
        predictor: Predictor object, used to to prediction.
        distributed: whether distributed evaluating or not
        output_dir: path to save prediction results
    Returns:
        evaluate result
    """
    cpu_device = torch.device("cpu")
    logger = logging.getLogger("SSD.inference")
    logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(test_dataset)))
    indices = list(range(len(test_dataset)))
    if distributed:
        indices = indices[distributed_util.get_rank()::distributed_util.get_world_size()]

    # show progress bar only on main process.
    progress_bar = tqdm if distributed_util.is_main_process() else iter
    logger.info('Progress on {} 0:'.format(cfg.MODEL.DEVICE.upper()))
    predictions = {}
    for i in progress_bar(indices):
        image = test_dataset.get_image(i)
        #print(type(image))
        
        #image=numpy(image)
        #transform=PredictionTransform(cfg.INPUT.IMAGE_SIZE, cfg.INPUT.PIXEL_MEAN)
        #image=transform(image)
        output = predictor.predict(image)
        print('output')
        boxes, labels, scores = [o.to(cpu_device).numpy() for o in output]
        predictions[i] = (boxes, labels, scores)
    distributed_util.synchronize()
    predictions = _accumulate_predictions_from_multiple_gpus(predictions)
    if not distributed_util.is_main_process():
        return

    final_output_dir = os.path.join(output_dir, dataset_name)
    if not os.path.exists(final_output_dir):
        os.makedirs(final_output_dir)
    torch.save(predictions, os.path.join(final_output_dir, 'predictions.pth'))
    return evaluate(dataset=test_dataset, predictions=predictions, output_dir=final_output_dir)
Example #3
0
def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu):
    all_predictions = distributed_util.all_gather(predictions_per_gpu)
    if not distributed_util.is_main_process():
        return
    # merge the list of dicts
    predictions = {}
    for p in all_predictions:
        predictions.update(p)
    # convert a dict where the key is the index in a list
    image_ids = list(sorted(predictions.keys()))
    if len(image_ids) != image_ids[-1] + 1:
        logger = logging.getLogger("SSD.inference")
        logger.warning(
            "Number of images that were gathered from multiple processes is not "
            "a contiguous set. Some images might be missing from the evaluation"
        )

    # convert to a list
    predictions = [predictions[i] for i in image_ids]
    return predictions
Example #4
0
def do_train(cfg, model,
             data_loader,
             optimizer,
             scheduler,
             device,
             args,
             val_sets_dict=None):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training")
    model.train()
    save_to_disk = distributed_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR)
        tf_writer = tf.compat.v1.summary.FileWriter(cfg.OUTPUT_DIR)
    else:
        summary_writer = None


    if cfg.DATASETS.DG:
        dataloaders = data_loader
        max_iter = len(data_loader[0])
        dataiters = [iter(dataloader) for dataloader in dataloaders]
    else:
        max_iter = len(data_loader)
        data_loader = iter(data_loader)

    start_training_time = time.time()
    trained_time = 0
    tic = time.time()
    end = time.time()

    if args.return_best:
        best_map = 0
    
    for iteration in range(scheduler.last_epoch, max_iter):
        if cfg.DATASETS.DG:
            # domain generalization settings
            # we need to read images from different sources
            images = torch.ones(cfg.SOLVER.BATCH_SIZE * len(dataloaders), 3, cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE)

            for j in range(len(dataloaders)):
                if cfg.MODEL.SELF_SUPERVISED:
                    d_images, d_boxes, d_labels, d_j_images, d_j_index, d_orig_boxes, d_orig_labels = next(dataiters[j])
                else:
                    d_images, d_boxes, d_labels, d_orig_boxes, d_orig_labels = next(dataiters[j])

                start_bs = cfg.SOLVER.BATCH_SIZE * j
                end_bs = start_bs + cfg.SOLVER.BATCH_SIZE

                images[start_bs:end_bs, :, :, :] = d_images

                if j == 0:
                    boxes = d_boxes
                    labels = d_labels
                    orig_boxes = d_orig_boxes
                    orig_labels = d_orig_labels

                    if cfg.MODEL.SELF_SUPERVISED:
                        j_images = d_j_images
                        j_index = d_j_index
                else:
                    boxes = torch.cat((boxes, d_boxes))
                    labels = torch.cat((labels, d_labels))
                    orig_boxes = torch.cat((orig_boxes, d_orig_boxes))
                    orig_labels = torch.cat((orig_labels, d_orig_labels))

                    if cfg.MODEL.SELF_SUPERVISED:
                        j_images = torch.cat((j_images, d_j_images))
                        j_index = torch.cat((j_index, d_j_index))
        else:
            if cfg.MODEL.SELF_SUPERVISED:
                images, boxes, labels, j_images, j_index, orig_boxes, orig_labels = next(data_loader)
            else:
                images, boxes, labels, orig_boxes, orig_labels = next(data_loader)

        # it is not a problem if we increment iteration because it will be reset in the loop
        iteration = iteration + 1

        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        loss_dict = model(images, targets=(boxes, labels))
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss = sum(loss for loss in loss_dict.values())

        # loss.backward() becomes:
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()

        if cfg.MODEL.SELF_SUPERVISED:
            j_images = j_images.to(device)
            j_index = j_index.to(device)
            loss_dict_j = model(j_images, targets=j_index, auxiliary_task=True)
            loss_dict_reduced_j = reduce_loss_dict(loss_dict_j)
            losses_reduced_j = sum(loss for loss in loss_dict_reduced_j.values())
            loss_j = sum(loss for loss in loss_dict_j.values())
            # apply reduction factor for auxiliary loss
            loss_j = loss_j * cfg.MODEL.SELF_SUPERVISOR.WEIGHT

            # loss.backward() becomes:
            with amp.scale_loss(loss_j, optimizer) as scaled_loss:
                scaled_loss.backward()

            # append this loss to the dictionary of losses
            loss_dict.update(loss_dict_j)
            losses_reduced += losses_reduced_j

        optimizer.step()
        scheduler.step()

        trained_time += time.time() - end
        end = time.time()
        if iteration % args.log_step == 0:
            eta_seconds = int((trained_time / iteration) * (max_iter - iteration))
            log_str = [
                "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}".format(iteration,
                                                                          optimizer.param_groups[0]['lr'],
                                                                          time.time() - tic,
                                                                          str(datetime.timedelta(seconds=eta_seconds))),
                "total_loss: {:.3f}".format(losses_reduced.item())
            ]
            for loss_name, loss_item in loss_dict_reduced.items():
                log_str.append("{}: {:.3f}".format(loss_name, loss_item.item()))
            log_str = ', '.join(log_str)
            logger.info(log_str)
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step)
                summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)

                if cfg.MODEL.SELF_SUPERVISED:
                    _log_images_tensorboard(cfg, global_step, images, orig_boxes, orig_labels, summary_writer, j_images=j_images)
                else:
                    _log_images_tensorboard(cfg, global_step, images, orig_boxes, orig_labels, summary_writer)
                #for tag, value in model.named_parameters():
                #    tag = tag.replace('.', '/')
                #    if 'ss_classifier' in tag:
                #        print(tag, value)
                #_log_network_params(tf_writer, model, global_step)

            tic = time.time()

        if save_to_disk and iteration % args.save_step == 0:
            model_path = os.path.join(cfg.OUTPUT_DIR,
                                      "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration))
            save_training_checkpoint(logger, model, scheduler, optimizer, model_path)
        # Do eval when training, to trace the mAP changes and see whether or not performance improved
        # if args.return_best = True the model returned should be the one that gave best performances on the val set
        if args.eval_step > 0 and iteration % args.eval_step == 0 and (not iteration == max_iter or args.return_best):
            dataset_metrics = do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed, datasets_dict=val_sets_dict)
            
            model.train()

            if args.distributed and not distributed_util.is_main_process():
                continue
            
            avg_map = _compute_avg_map(dataset_metrics)
            
            if args.return_best:

                if avg_map > best_map:
                    best_map = avg_map
                    logger.info("With iteration {} passed the best! New best avg map: {:4f}".format(iteration, best_map))
                    model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_best.pth".format(cfg.INPUT.IMAGE_SIZE))
                    _save_model(logger, model, model_path)
                else:
                    logger.info("With iteration {} the best has not been reached. Best avg map: {:4f}, Current avg mAP: {:4f}".format(iteration, best_map, avg_map))
                
            # logging
            if summary_writer:
                global_step = iteration

                summary_writer.add_scalar("val_avg_map", avg_map, global_step=global_step)

                for dataset_name, metrics in dataset_metrics.items():
                    for metric_name, metric_value in metrics.get_printable_metrics().items():
                        summary_writer.add_scalar('/'.join(['val', dataset_name, metric_name]), metric_value,
                                                  global_step=global_step)

    if save_to_disk:
        model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE))
        _save_model(logger, model, model_path)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
    if args.return_best:
        model.load(os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_best.pth".format(cfg.INPUT.IMAGE_SIZE)))
    return model
Example #5
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        '--vgg',
        help=
        'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth'
    )
    parser.add_argument(
        '--resume',
        default=None,
        type=str,
        help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step',
                        default=50,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--save_step',
                        default=5000,
                        type=int,
                        help='Save checkpoint every save_step')
    parser.add_argument(
        '--eval_step',
        default=0,
        type=int,
        help=
        'Evaluate dataset every eval_step, disabled when eval_step <= 0. Default: disabled'
    )
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument("--num_workers",
                        default=4,
                        type=int,
                        help="Number of workers to use for data loaders")
    parser.add_argument(
        "--eval_mode",
        default="test",
        type=str,
        help=
        'Use defined test datasets for periodic evaluation or use a validation split. Default: "test", alternative "val"'
    )
    parser.add_argument(
        "--return_best",
        default=False,
        type=str2bool,
        help=
        "If false (default) tests on the target the last model. If true tests on the target the model with the best performance on the validation set"
    )
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    if not os.path.exists(cfg.OUTPUT_DIR):
        if not args.distributed or (args.distributed
                                    and distributed_util.is_main_process()):
            os.makedirs(cfg.OUTPUT_DIR)

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)