Beispiel #1
0
def prune_and_eval(model, sorted_bn, prune_idx,percent,cfg):       #不是最终准确的结果
    print(f'mAP of the original model is:')
    with torch.no_grad():
        eval = do_evaluation(cfg, model, distributed=False)
        print(eval[0]['metrics'])
    model_copy = deepcopy(model)
    thre_index = int(len(sorted_bn) * percent)
    # 获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉
    thre = sorted_bn[thre_index]
    thre = thre.cuda()

    print(f'Channels with Gamma value less than {thre:.4f} are pruned!')
    remain_num = 0
    for idx in prune_idx:
        bn_module = model_copy.backbone.module_list[idx][1]
        mask = bn_module.weight.data.abs().ge(thre).float()
        remain_num += int(mask.sum())
        bn_module.weight.data.mul_(mask)
    print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}')
    print(f'Prune ratio: {1 - remain_num / len(sorted_bn):.3f}')
    print('快速看剪枝效果----》')
    print(f'mAP of the pruned model is:')
    with torch.no_grad():
        eval=do_evaluation(cfg, model_copy, distributed=False)
        print(eval[0]['metrics'])
    return thre
Beispiel #2
0
def evaluation(cfg, ckpt):
    logger = logging.getLogger("SSD.inference")

    model = SSDDetector(cfg)
    checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger)
    model = torch_utils.to_cuda(model)
    checkpointer.load(ckpt, use_latest=ckpt is None)
    do_evaluation(cfg, model)
Beispiel #3
0
def evaluation(cfg, ckpt, distributed):
    logger = logging.getLogger("SSD.inference")

    model = build_detection_model(cfg)
    checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    checkpointer.load(ckpt, use_latest=ckpt is None)
    do_evaluation(cfg, model, distributed)
def main():
    parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument('--vgg', help='Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth')
    parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step')
    parser.add_argument('--save_step', default=5000, type=int, help='Save checkpoint every save_step')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl", init_method="env://")

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
Beispiel #5
0
def evaluation(cfg, ckpt, distributed):
    logger = logging.getLogger("SSD.inference")

    model = build_detection_model(cfg)
    checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger)
    device = torch.device(cfg.MODEL.DEVICE)
    #model.load_state_dict(torch.load('outputs/vgg_ssd300_voc0712.pth'), strict=False)
    model.to(device)
    checkpointer.load(ckpt, use_latest=ckpt is None)
    do_evaluation(cfg, model, distributed)
Beispiel #6
0
def evaluation(cfg, weights_file, output_dir, distributed):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    device = torch.device(cfg.MODEL.DEVICE)
    model = build_ssd_model(cfg)
    model.load(weights_file)
    logger = logging.getLogger("SSD.inference")
    logger.info('Loaded weights from {}.'.format(weights_file))
    model.to(device)
    do_evaluation(cfg, model, output_dir, distributed)
Beispiel #7
0
def prune_and_eval(model, CBL_idx, CBLidx2mask, cfg):
    print(f'mAP of the original model is:')
    with torch.no_grad():
        eval = do_evaluation(cfg, model, distributed=False)
        print(eval[0]['metrics'])
    model_copy = deepcopy(model)
    for idx in CBL_idx:
        bn_module = model_copy.backbone.module_list[idx][1]
        mask = CBLidx2mask[idx].cuda()
        bn_module.weight.data.mul_(mask)
    print('快速看剪枝效果----》')
    print(f'mAP of the pruned model is:')
    with torch.no_grad():
        eval = do_evaluation(cfg, model_copy, distributed=False)
        print(eval[0]['metrics'])
Beispiel #8
0
 def score(self):
     torch.cuda.empty_cache()
     eval_results = do_evaluation(self.cfg,
                                  self.model,
                                  distributed=self.args.distributed)
     mAP = eval_results[0]['metrics']['mAP']
     return mAP
Beispiel #9
0
def evaluation(cfg, ckpt, distributed, model_path=None):
    logger = logging.getLogger("SSD.inference")
    model = build_detection_model(cfg)
    logger.info("Model :\n{}".format(model))  #如果用print,多gpu会打印两便
    checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    if model_path is None:
        checkpointer.load(ckpt, use_latest=ckpt is None)
    else:
        model.load_state_dict(torch.load(model_path))
    if cfg.TEST.BN_FUSE is True:
        print('BN_FUSE.')
        model.backbone.bn_fuse()
        model.to(device)
    do_evaluation(cfg, model, distributed)
Beispiel #10
0
def evaluation(cfg, ckpt, distributed):
    logger: logging.RootLogger = logging.getLogger("SSD.inference")

    model = build_detection_model(cfg)
    checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    checkpointer.load(ckpt, use_latest=ckpt is None)

    for scale in np.linspace(0.5, 1.0, 5):
        logger.info(f"Running eval with rescale factor: {scale}")
        eval_result = do_evaluation(cfg, model, distributed, rescale=scale)
Beispiel #11
0
def evaluation(cfg, args, weights_file, output_dir, distributed):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    device = torch.device(cfg.MODEL.DEVICE)
    model = build_ssd_model(cfg)
    model.load(open(weights_file, 'rb'))
    logger = logging.getLogger("SSD.inference")
    logger.info('Loaded weights from {}.'.format(weights_file))
    model.to(device)

    if args.eval_mode == "test":
        do_evaluation(cfg, model, output_dir, distributed)
    else:
        dataset_metrics = do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed, datasets_dict=_create_val_datasets(args, cfg, logger))
        count = len(dataset_metrics)
        map_sum = 0
        for k,v in dataset_metrics.items():
            #logger.info("mAP on {}: {:.3f}".format(k, v.info["mAP"]))
            map_sum += v.info["mAP"]

        avg_map = map_sum/count
        print("'Model': '{}', 'Avg_mAP': {}".format(weights_file, avg_map))
Beispiel #12
0
def main():
    args = get_parser().parse_args()
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    output_dir = pathlib.Path(cfg.OUTPUT_DIR)
    output_dir.mkdir(exist_ok=True, parents=True)

    logger = setup_logger("SSD", output_dir)
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = start_train(cfg)

    logger.info('Start evaluating...')
    torch.cuda.empty_cache()  # speed up evaluating after training finished
    do_evaluation(cfg, model)
Beispiel #13
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "config_file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    output_dir = pathlib.Path(cfg.OUTPUT_DIR)
    output_dir.mkdir(exist_ok=True, parents=True)

    logger = setup_logger("SSD", output_dir)
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = start_train(cfg)

    logger.info('Start evaluating...')
    torch.cuda.empty_cache()  # speed up evaluating after training finished
    do_evaluation(cfg, model)
Beispiel #14
0
def do_train(cfg, model,
             data_loader,
             optimizer,
             scheduler,
             device,
             args):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training")
    model.train()

    save_to_disk = distributed_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR)
    else:
        summary_writer = None

    max_iter = len(data_loader)
    start_training_time = time.time()
    trained_time = 0
    tic = time.time()
    end = time.time()
    for iteration, (images, boxes, labels) in enumerate(data_loader):
        iteration = iteration + 1
        scheduler.step()
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)
        
        #print(images.shape)
        #print(labels.shape)
        #print(boxes.shape)
        optimizer.zero_grad()
        loss_dict = model(images, targets=(boxes, labels))

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss = sum(loss for loss in loss_dict.values())
        loss.backward()
        optimizer.step()
        trained_time += time.time() - end
        end = time.time()
        if iteration % args.log_step == 0:
            eta_seconds = int((trained_time / iteration) * (max_iter - iteration))
            log_str = [
                "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}".format(iteration,
                                                                          optimizer.param_groups[0]['lr'],
                                                                          time.time() - tic, str(datetime.timedelta(seconds=eta_seconds))),
                "total_loss: {:.3f}".format(losses_reduced.item())
            ]
            for loss_name, loss_item in loss_dict_reduced.items():
                log_str.append("{}: {:.3f}".format(loss_name, loss_item.item()))
            log_str = ', '.join(log_str)
            logger.info(log_str)
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step)
                summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)

            tic = time.time()

        if save_to_disk and iteration % args.save_step == 0:
            model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration))
            _save_model(logger, model, model_path)
        # Do eval when training, to trace the mAP changes and see performance improved whether or nor
        if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
            do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
            model.train()

    if save_to_disk:
        model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE))
        _save_model(logger, model, model_path)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
    return model
def main():
    # 解析命令行 读取配置文件
    '''
    规定了模型的基本参数,训练的类,一共是20类加上背景所以是21
    模型的输入大小,为了不对原图造成影响,一般是填充为300*300的图像
    训练的文件夹路径2007和2012,测试的文件夹路径2007
    最大迭代次数为120000.学习率还有gamma的值,总之就是一系列的超参数
    输出的文件目录
    MODEL:
        NUM_CLASSES: 21
    INPUT:
        IMAGE_SIZE: 300
    DATASETS:
        TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
        TEST: ("voc_2007_test", )
    SOLVER:
        MAX_ITER: 120000
        LR_STEPS: [80000, 100000]
        GAMMA: 0.1
        BATCH_SIZE: 32
        LR: 1e-3
    OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712'
    Returns:
    '''
    parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="configs/vgg_ssd300_voc0712.yaml",
        # default="configs/vgg_ssd300_visdrone0413.yaml",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    # 每2500步保存一次文件,并且验证一次文件,记录是每10次记录一次,然后如果不想看tensor的记录的话,可以关闭,使用的是tensorboardX
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step')
    parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step')
    parser.add_argument('--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    # 参数解析,可以使用多GPU进行训练
    args = parser.parse_args()
    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    # 做一些启动前必要的检查
    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl", init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    # 创建模型输出文件夹
    if cfg.OUTPUT_DIR:
        mkdir(cfg.OUTPUT_DIR)

    # 使用logger来进行记录
    logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    # 加载配置文件
    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    # 模型训练
    # model = train(cfg, args)
    model = train(cfg, args)

    # 开始进行验证
    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, distributed=args.distributed)
Beispiel #16
0
def do_train(cfg, model, data_loader, optimizer, scheduler, checkpointer,
             arguments):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    model.train()

    summary_writer = torch.utils.tensorboard.SummaryWriter(
        log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        iteration = iteration + 1
        arguments["iteration"] = iteration
        images = torch_utils.to_cuda(images)
        targets = torch_utils.to_cuda(targets)
        loss_dict = model(images, targets=targets)
        loss = sum(loss for loss in loss_dict.values())

        meters.update(total_loss=loss, **loss_dict)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time)
        if iteration % cfg.LOG_STEP == 0:
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            logger.info(
                meters.delimiter.join([
                    "iter: {iter:06d}",
                    "lr: {lr:.5f}",
                    '{meters}',
                    "eta: {eta}",
                    'mem: {mem}M',
                ]).format(iter=iteration,
                          lr=optimizer.param_groups[0]['lr'],
                          meters=str(meters),
                          eta=eta_string,
                          mem=round(torch.cuda.max_memory_allocated() /
                                    1024.0 / 1024.0)))
            global_step = iteration
            summary_writer.add_scalar('losses/total_loss',
                                      loss,
                                      global_step=global_step)
            for loss_name, loss_item in loss_dict.items():
                summary_writer.add_scalar('losses/{}'.format(loss_name),
                                          loss_item,
                                          global_step=global_step)
            summary_writer.add_scalar('lr',
                                      optimizer.param_groups[0]['lr'],
                                      global_step=global_step)

        if iteration % cfg.MODEL_SAVE_STEP == 0:
            checkpointer.save("model_{:06d}".format(iteration), **arguments)

        if cfg.EVAL_STEP > 0 and iteration % cfg.EVAL_STEP == 0:
            eval_results = do_evaluation(cfg, model, iteration=iteration)
            for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST):
                write_metric(eval_result['metrics'], 'metrics/' + dataset,
                             summary_writer, iteration)
            model.train()  # *IMPORTANT*: change to train mode after eval.

    checkpointer.save("model_final", **arguments)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    return model
Beispiel #17
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        '--vgg',
        help=
        'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth'
    )
    parser.add_argument(
        '--resume',
        default=None,
        type=str,
        help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step',
                        default=50,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--save_step',
                        default=5000,
                        type=int,
                        help='Save checkpoint every save_step')
    parser.add_argument(
        '--eval_step',
        default=0,
        type=int,
        help=
        'Evaluate dataset every eval_step, disabled when eval_step <= 0. Default: disabled'
    )
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument("--num_workers",
                        default=4,
                        type=int,
                        help="Number of workers to use for data loaders")
    parser.add_argument(
        "--eval_mode",
        default="test",
        type=str,
        help=
        'Use defined test datasets for periodic evaluation or use a validation split. Default: "test", alternative "val"'
    )
    parser.add_argument(
        "--return_best",
        default=False,
        type=str2bool,
        help=
        "If false (default) tests on the target the last model. If true tests on the target the model with the best performance on the validation set"
    )
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    if not os.path.exists(cfg.OUTPUT_DIR):
        if not args.distributed or (args.distributed
                                    and distributed_util.is_main_process()):
            os.makedirs(cfg.OUTPUT_DIR)

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
Beispiel #18
0
def do_train(cfg, model, data_loader, optimizer, scheduler, checkpointer,
             device, arguments, args):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    model.train()
    save_to_disk = dist_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(
            log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))
    else:
        summary_writer = None

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()
    max_epoch = 10
    for epoch in range(max_epoch):
        logger.info('epoch: {}'.format(epoch))
        for iteration, (images, targets,
                        _) in enumerate(data_loader, start_iter):
            # print("imgs shape:  ",images.shape,iteration)
            # continue
            # iteration = iteration + 1
            arguments["iteration"] = iteration
            scheduler.step()

            images = images.to(device)
            targets = targets.to(device)
            loss_dict = model(images, targets=targets)
            loss = sum(loss for loss in loss_dict.values())

            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(total_loss=losses_reduced, **loss_dict_reduced)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_time = time.time() - end
            end = time.time()
            meters.update(time=batch_time)

            # log step
            if iteration % args.log_step == 0:
                eta_seconds = meters.time.global_avg * (max_iter - iteration)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                logger.info(
                    meters.delimiter.join([
                        "iter: {iter:06d}",
                        "lr: {lr:.5f}",
                        '{meters}',
                        "eta: {eta}",
                        'mem: {mem}M',
                    ]).format(
                        iter=iteration,
                        lr=optimizer.param_groups[0]['lr'],
                        meters=str(meters),
                        eta=eta_string,
                        mem=round(torch.cuda.max_memory_allocated() / 1024.0 /
                                  1024.0),
                    ))
                if summary_writer:
                    global_step = iteration
                    summary_writer.add_scalar('losses/total_loss',
                                              losses_reduced,
                                              global_step=global_step)
                    for loss_name, loss_item in loss_dict_reduced.items():
                        summary_writer.add_scalar(
                            'losses/{}'.format(loss_name),
                            loss_item,
                            global_step=global_step)
                    summary_writer.add_scalar('lr',
                                              optimizer.param_groups[0]['lr'],
                                              global_step=global_step)

            # save step
            if iteration % args.save_step == 0:
                checkpointer.save("model_{:06d}".format(iteration),
                                  **arguments)

            # eval step
            if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
                # if True:
                eval_results = do_evaluation(cfg,
                                             model,
                                             distributed=args.distributed,
                                             iteration=iteration)
                if dist_util.get_rank() == 0 and summary_writer:
                    for eval_result, dataset in zip(eval_results,
                                                    cfg.DATASETS.TEST):
                        write_metric(eval_result['metrics'],
                                     'metrics/' + dataset, summary_writer,
                                     iteration)
                model.train()  # *IMPORTANT*: change to train mode after eval.

    checkpointer.save("model_final", **arguments)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    return model
Beispiel #19
0
def shortcut_prune(cfg, model, pruned_cfg, file, max, percent, quick,
                   weight_path):
    obtain_num_parameters = lambda model: sum(
        [param.nelement() for param in model.parameters()])
    origin_nparameters = obtain_num_parameters(model)
    origin_size = model_size(model)
    #这里采用的shortcut方法,是https://github.com/SpursLipu/YOLOv3-ModelCompression-MultidatasetTraining-Multibackbone/blob/4516d76ba89b561983babd679543135484e7e9ac/slim_prune.py的方法
    CBL_idx, Conv_idx, prune_idx, _, _ = parse_module_defs(
        model.backbone.module_defs)

    # 将所有要剪枝的BN层的α参数,拷贝到bn_weights列表
    bn_weights = gather_bn_weights(model.backbone.module_list, prune_idx)
    # torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引
    sorted_bn = torch.sort(bn_weights)[0]
    thresh_index = int(len(bn_weights) * percent)
    thresh = sorted_bn[thresh_index].cuda()

    print(f'Global Threshold should be less than {thresh:.9f}.')

    predictor_channels = list(cfg.MODEL.BACKBONE.OUT_CHANNELS)
    # 获得保留的卷积核的个数和每层对应的mask,以及对应的head通道数
    num_filters, filters_mask, predictor_channels = obtain_filters_mask(
        model.backbone, thresh, CBL_idx, prune_idx, predictor_channels, max)
    # CBLidx2mask存储CBL_idx中,每一层BN层对应的mask
    CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
    CBLidx2filters = {
        idx: filters
        for idx, filters in zip(CBL_idx, num_filters)
    }
    for i in model.backbone.module_defs:
        if i['type'] == 'shortcut':
            i['is_access'] = False
    print('merge the mask of layers connected to shortcut!')
    merge_mask(model.backbone, CBLidx2mask, CBLidx2filters)

    prune_and_eval(model, CBL_idx, CBLidx2mask, cfg)
    for i in CBLidx2mask:
        CBLidx2mask[i] = CBLidx2mask[i].clone().cpu().numpy()
    if quick == 0:
        print('实际剪枝---》')
        pruned_model = prune_model_keep_size(cfg, model, prune_idx, CBL_idx,
                                             CBLidx2mask)
        if max == 0:
            with torch.no_grad():
                eval = do_evaluation(cfg, pruned_model, distributed=False)
            print('after prune_model_keep_size mAP is {}'.format(
                eval[0]['metrics']))  #对于最大剪枝,这里是不准的   相当于还没有截掉后面层

        # 获得原始模型的module_defs,并修改该defs中的卷积核数量
        compact_module_defs = deepcopy(model.backbone.module_defs)
        prune_after = -1  # cbl_idx索引号,后面的层都不要了(实际上看,是第一个BN偏置全零层最近的预测层之后的都被剪掉)  针对max
        if max == 1:
            new_predictor_channels = []
            for idx in CBL_idx:
                if model.backbone.module_defs[idx][
                        'feature'] == 'linear' or model.backbone.module_defs[
                            idx]['feature'] == 'l2_norm':
                    i = int(model.backbone.module_defs[idx]['feature_idx'])
                    if predictor_channels[i] != -1:
                        new_predictor_channels.append(predictor_channels[i])
                        if i + 1 < len(predictor_channels):
                            if predictor_channels[i + 1] == -1:
                                prune_after = idx
                                break
                    if i + 1 == len(predictor_channels):
                        break
                elif model.backbone.module_defs[idx + 1][
                        'type'] == 'shortcut' and model.backbone.module_defs[
                            idx + 1]['feature'] == 'linear':
                    i = int(model.backbone.module_defs[idx + 1]['feature_idx'])
                    new_predictor_channels.append(
                        predictor_channels[i])  # 第一个short_cut连接head不会被裁掉
            predictor_channels = new_predictor_channels

        for idx, num in zip(CBL_idx, num_filters):
            assert compact_module_defs[idx]['type'] == 'convolutional'
            if idx == prune_after + 1 and prune_after != -1:
                compact_module_defs[idx]['filters'] = '-1'  #这一层连同之后都不要
                break
            else:
                compact_module_defs[idx]['filters'] = str(num)

        write_cfg(pruned_cfg, compact_module_defs)
        print(f'Config file has been saved: {pruned_cfg}')
        cfg.MODEL.BACKBONE.OUT_CHANNELS = tuple(predictor_channels)
        print(
            f'PRUNED_MODEL.BACKBONE.OUT_CHANNELS:{cfg.MODEL.BACKBONE.OUT_CHANNELS}'
        )
        cfg.MODEL.BACKBONE.CFG = pruned_cfg
        cfg.MODEL.BACKBONE.PRETRAINED = False  #定义模型时会加载预训练权重,这里不需要,因为之前的权重不匹配现在的通道数
        compact_model = build_detection_model(cfg)
        # print(compact_model)
        device = torch.device(cfg.MODEL.DEVICE)
        compact_model.to(device)
        init_weights_from_loose_model(compact_model, pruned_model, CBL_idx,
                                      Conv_idx, CBLidx2mask, prune_after)
        compact_nparameters = obtain_num_parameters(compact_model)
        compact_size = model_size(compact_model)
        random_input = torch.rand(
            (16, 3, cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE)).to(device)
        pruned_forward_time = obtain_avg_forward_time(random_input,
                                                      pruned_model)
        compact_forward_time = obtain_avg_forward_time(random_input,
                                                       compact_model)
        # print(compact_model)
        # print(compact_model)
        with torch.no_grad():
            eval = do_evaluation(cfg, compact_model, distributed=False)
        print('Final pruned model mAP is {}'.format(eval[0]['metrics']))
        metric_table = [
            ["Metric", "Before", "After"],
            [
                "Parameters(M)", f"{origin_nparameters/(1024*1024)}",
                f"{compact_nparameters/(1024*1024)}"
            ],
            ["模型体积(MB)", f"{origin_size}", f"{compact_size}"],
            [
                "Inference(ms)", f'{pruned_forward_time*1000/16:.4f}',
                f'{compact_forward_time*1000/16:.4f}'
            ]  #bs=16
        ]
        print(AsciiTable(metric_table).table)
        print(
            f'压缩率:{(origin_nparameters-compact_nparameters)/origin_nparameters}'
        )
        file.write(
            f'PRUNED_MODEL.BACKBONE.OUT_CHANNELS:{cfg.MODEL.BACKBONE.OUT_CHANNELS}'
            + '\n')
        file.write(AsciiTable(metric_table).table + '\n')
        file.write(
            f'压缩率:{(origin_nparameters-compact_nparameters)/origin_nparameters}'
            + '\n')
        file.close()

        torch.save(compact_model.state_dict(), weight_path)
        print(f'Compact model has been saved.')
Beispiel #20
0
def do_train(
    cfg: CfgNode,
    model: SSDDetector,
    data_loader: DataLoader,
    optimizer: SGD,
    scheduler: MultiStepLR,
    checkpointer,
    device: device,
    arguments,
    args: Namespace,
    output_dir: Path,
    model_manager: Dict[str, Any],
) -> SSDDetector:
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    model.train()
    save_to_disk = dist_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(logdir=output_dir / "logs")
    else:
        summary_writer = None

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()

    logger.info("MAX_ITER: {}".format(max_iter))

    # GB: 2019-09-08:
    # For rescaling tests, do an eval before fine-tuning-training, so we know what
    # the eval results are before any weights are updated:
    # do_evaluation(
    #     cfg,
    #     model,
    #     distributed=args.distributed,
    #     iteration=0,
    # )
    # model.train()  # *IMPORTANT*: change to train mode after eval.

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        # TODO: Print learning rate:
        iteration = iteration + 1
        arguments["iteration"] = iteration
        scheduler.step()

        images = images.to(device)
        targets = targets.to(device)
        loss_dict = model(images, targets=targets)
        loss = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss = sum(loss for loss in loss_dict.values())
        meters.update(total_loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time)
        if iteration % args.log_step == 0:
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            logger.info(
                meters.delimiter.join([
                    "iter: {iter:06d}",
                    "lr: {lr:.5f}",
                    "{meters}",
                    "eta: {eta}",
                    "mem: {mem}M",
                ]).format(
                    iter=iteration,
                    lr=optimizer.param_groups[0]["lr"],
                    meters=str(meters),
                    eta=eta_string,
                    mem=round(torch.cuda.max_memory_allocated() / 1024.0 /
                              1024.0),
                ))
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar("losses/total_loss",
                                          losses_reduced,
                                          global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar(
                        "losses/{}".format(loss_name),
                        loss_item,
                        global_step=global_step,
                    )
                summary_writer.add_scalar("lr",
                                          optimizer.param_groups[0]["lr"],
                                          global_step=global_step)

        # This project doesn't use epochs, it does something with batch samplers
        # instead, so there is only a concept of "iteration". For now hardcode epoch as
        # zero to put into file name:
        epoch = 0
        save_name = f"ssd{cfg.INPUT.IMAGE_SIZE}-vgg_{cfg.DATASETS.TRAIN[0]}_0_{epoch}_{iteration:06d}"
        model_path = Path(output_dir) / f"{save_name}.pth"

        # Above if block would be replaced by this:
        if iteration % args.save_step == 0:
            checkpointer.save(save_name, **arguments)

        # Do eval when training, to trace the mAP changes and see performance improved
        # whether or nor
        if (args.eval_step > 0 and iteration % args.eval_step == 0
                and not iteration == max_iter):
            eval_results = do_evaluation(
                cfg,
                model,
                distributed=args.distributed,
                iteration=iteration,
            )
            do_best_model_checkpointing(cfg, model_path, eval_results,
                                        model_manager, logger)
            if dist_util.get_rank() == 0 and summary_writer:
                for eval_result, dataset in zip(eval_results,
                                                cfg.DATASETS.TEST):
                    write_metric(
                        eval_result["metrics"],
                        "metrics/" + dataset,
                        summary_writer,
                        iteration,
                    )
            model.train()  # *IMPORTANT*: change to train mode after eval.

        if iteration % args.save_step == 0:
            remove_extra_checkpoints(output_dir, [model_path], logger)

    checkpointer.save("model_final", **arguments)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    return model
Beispiel #21
0
def do_train(cfg, model,
             data_loader,
             optimizer,
             scheduler,
             device,
             args,
             val_sets_dict=None):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training")
    model.train()
    save_to_disk = distributed_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR)
        tf_writer = tf.compat.v1.summary.FileWriter(cfg.OUTPUT_DIR)
    else:
        summary_writer = None


    if cfg.DATASETS.DG:
        dataloaders = data_loader
        max_iter = len(data_loader[0])
        dataiters = [iter(dataloader) for dataloader in dataloaders]
    else:
        max_iter = len(data_loader)
        data_loader = iter(data_loader)

    start_training_time = time.time()
    trained_time = 0
    tic = time.time()
    end = time.time()

    if args.return_best:
        best_map = 0
    
    for iteration in range(scheduler.last_epoch, max_iter):
        if cfg.DATASETS.DG:
            # domain generalization settings
            # we need to read images from different sources
            images = torch.ones(cfg.SOLVER.BATCH_SIZE * len(dataloaders), 3, cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE)

            for j in range(len(dataloaders)):
                if cfg.MODEL.SELF_SUPERVISED:
                    d_images, d_boxes, d_labels, d_j_images, d_j_index, d_orig_boxes, d_orig_labels = next(dataiters[j])
                else:
                    d_images, d_boxes, d_labels, d_orig_boxes, d_orig_labels = next(dataiters[j])

                start_bs = cfg.SOLVER.BATCH_SIZE * j
                end_bs = start_bs + cfg.SOLVER.BATCH_SIZE

                images[start_bs:end_bs, :, :, :] = d_images

                if j == 0:
                    boxes = d_boxes
                    labels = d_labels
                    orig_boxes = d_orig_boxes
                    orig_labels = d_orig_labels

                    if cfg.MODEL.SELF_SUPERVISED:
                        j_images = d_j_images
                        j_index = d_j_index
                else:
                    boxes = torch.cat((boxes, d_boxes))
                    labels = torch.cat((labels, d_labels))
                    orig_boxes = torch.cat((orig_boxes, d_orig_boxes))
                    orig_labels = torch.cat((orig_labels, d_orig_labels))

                    if cfg.MODEL.SELF_SUPERVISED:
                        j_images = torch.cat((j_images, d_j_images))
                        j_index = torch.cat((j_index, d_j_index))
        else:
            if cfg.MODEL.SELF_SUPERVISED:
                images, boxes, labels, j_images, j_index, orig_boxes, orig_labels = next(data_loader)
            else:
                images, boxes, labels, orig_boxes, orig_labels = next(data_loader)

        # it is not a problem if we increment iteration because it will be reset in the loop
        iteration = iteration + 1

        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        loss_dict = model(images, targets=(boxes, labels))
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss = sum(loss for loss in loss_dict.values())

        # loss.backward() becomes:
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()

        if cfg.MODEL.SELF_SUPERVISED:
            j_images = j_images.to(device)
            j_index = j_index.to(device)
            loss_dict_j = model(j_images, targets=j_index, auxiliary_task=True)
            loss_dict_reduced_j = reduce_loss_dict(loss_dict_j)
            losses_reduced_j = sum(loss for loss in loss_dict_reduced_j.values())
            loss_j = sum(loss for loss in loss_dict_j.values())
            # apply reduction factor for auxiliary loss
            loss_j = loss_j * cfg.MODEL.SELF_SUPERVISOR.WEIGHT

            # loss.backward() becomes:
            with amp.scale_loss(loss_j, optimizer) as scaled_loss:
                scaled_loss.backward()

            # append this loss to the dictionary of losses
            loss_dict.update(loss_dict_j)
            losses_reduced += losses_reduced_j

        optimizer.step()
        scheduler.step()

        trained_time += time.time() - end
        end = time.time()
        if iteration % args.log_step == 0:
            eta_seconds = int((trained_time / iteration) * (max_iter - iteration))
            log_str = [
                "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}".format(iteration,
                                                                          optimizer.param_groups[0]['lr'],
                                                                          time.time() - tic,
                                                                          str(datetime.timedelta(seconds=eta_seconds))),
                "total_loss: {:.3f}".format(losses_reduced.item())
            ]
            for loss_name, loss_item in loss_dict_reduced.items():
                log_str.append("{}: {:.3f}".format(loss_name, loss_item.item()))
            log_str = ', '.join(log_str)
            logger.info(log_str)
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step)
                summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)

                if cfg.MODEL.SELF_SUPERVISED:
                    _log_images_tensorboard(cfg, global_step, images, orig_boxes, orig_labels, summary_writer, j_images=j_images)
                else:
                    _log_images_tensorboard(cfg, global_step, images, orig_boxes, orig_labels, summary_writer)
                #for tag, value in model.named_parameters():
                #    tag = tag.replace('.', '/')
                #    if 'ss_classifier' in tag:
                #        print(tag, value)
                #_log_network_params(tf_writer, model, global_step)

            tic = time.time()

        if save_to_disk and iteration % args.save_step == 0:
            model_path = os.path.join(cfg.OUTPUT_DIR,
                                      "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration))
            save_training_checkpoint(logger, model, scheduler, optimizer, model_path)
        # Do eval when training, to trace the mAP changes and see whether or not performance improved
        # if args.return_best = True the model returned should be the one that gave best performances on the val set
        if args.eval_step > 0 and iteration % args.eval_step == 0 and (not iteration == max_iter or args.return_best):
            dataset_metrics = do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed, datasets_dict=val_sets_dict)
            
            model.train()

            if args.distributed and not distributed_util.is_main_process():
                continue
            
            avg_map = _compute_avg_map(dataset_metrics)
            
            if args.return_best:

                if avg_map > best_map:
                    best_map = avg_map
                    logger.info("With iteration {} passed the best! New best avg map: {:4f}".format(iteration, best_map))
                    model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_best.pth".format(cfg.INPUT.IMAGE_SIZE))
                    _save_model(logger, model, model_path)
                else:
                    logger.info("With iteration {} the best has not been reached. Best avg map: {:4f}, Current avg mAP: {:4f}".format(iteration, best_map, avg_map))
                
            # logging
            if summary_writer:
                global_step = iteration

                summary_writer.add_scalar("val_avg_map", avg_map, global_step=global_step)

                for dataset_name, metrics in dataset_metrics.items():
                    for metric_name, metric_value in metrics.get_printable_metrics().items():
                        summary_writer.add_scalar('/'.join(['val', dataset_name, metric_name]), metric_value,
                                                  global_step=global_step)

    if save_to_disk:
        model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE))
        _save_model(logger, model, model_path)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
    if args.return_best:
        model.load(os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_best.pth".format(cfg.INPUT.IMAGE_SIZE)))
    return model
Beispiel #22
0
def main():
    parser = ArgumentParser(
        description="Single Shot MultiBox Detector Training With PyTorch")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="config file name or path (relative to the configs/ folder) ",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--log_step",
                        default=50,
                        type=int,
                        help="Print logs every log_step")
    parser.add_argument("--save_step",
                        default=5000,
                        type=int,
                        help="Save checkpoint every save_step")
    parser.add_argument(
        "--eval_step",
        default=5000,
        type=int,
        help="Evaluate dataset every eval_step, disabled when eval_step < 0",
    )
    parser.add_argument("--use_tensorboard", default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=REMAINDER,
    )
    parser.add_argument(
        "--resume_experiment",
        default="None",
        dest="resume",
        type=str,
        help="Checkpoint state_dict file to resume training from",
    )
    args = parser.parse_args()
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    else:
        cfg.MODEL.DEVICE = "cpu"
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    eman = ExperimentManager("ssd")
    output_dir = eman.get_output_dir()

    args.config_file = str(
        Path(__file__).parent / "configs" / args.config_file)
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.OUTPUT_DIR = str(output_dir)
    cfg.freeze()

    eman.start({"cfg": cfg, "args": vars(args)})
    # We use our own output dir, set by ExperimentManager:
    # if cfg.OUTPUT_DIR:
    #     mkdir(cfg.OUTPUT_DIR)

    logger = setup_logger("SSD", dist_util.get_rank(), output_dir / "logs")
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)
    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
    logger.info(f"Output dir: {output_dir}")

    model_manager = {"best": None, "new": None}
    model = train(cfg, args, output_dir, model_manager)

    if not args.skip_test:
        logger.info("Start evaluating...")
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        eval_results = do_evaluation(
            cfg,
            model,
            distributed=args.distributed,
        )
        do_best_model_checkpointing(
            cfg,
            output_dir / "model_final.pth",
            eval_results,
            model_manager,
            logger,
            is_final=True,
        )

    eman.mark_dir_if_complete()
Beispiel #23
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--save_step',
                        default=2500,
                        type=int,
                        help='Save checkpoint every save_step')
    parser.add_argument(
        '--eval_step',
        default=2500,
        type=int,
        help='Evaluate dataset every eval_step, disabled when eval_step < 0')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    # Train distance regression network
    train_distance_regr()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    if cfg.OUTPUT_DIR:
        mkdir(cfg.OUTPUT_DIR)

    logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, distributed=args.distributed)
Beispiel #24
0
def do_train(cfg, model, data_loader, optimizer, scheduler, checkpointer,
             device, arguments, args):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    # #获得要剪枝的层
    if cfg.PRUNE.TYPE != 'no':
        if hasattr(model, 'module'):
            backbone = model.module.backbone
        else:
            backbone = model.backbone
        if cfg.PRUNE.TYPE == 'normal':
            logger.info('normal sparse training')
            _, _, prune_idx = normal_prune.parse_module_defs(
                backbone.module_defs)
        elif cfg.PRUNE.TYPE == 'shortcut':
            logger.info('shortcut sparse training')
            _, _, prune_idx, _, _ = shortcut_prune.parse_module_defs(
                backbone.module_defs)

    model.train()
    save_to_disk = dist_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        try:
            from torch.utils.tensorboard import SummaryWriter
        except ImportError:
            from tensorboardX import SummaryWriter
        summary_writer = SummaryWriter(
            log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))
    else:
        summary_writer = None

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = targets.to(device)
        loss_dict = model(images, targets=targets)
        loss = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(total_loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        loss.backward()

        # 对要剪枝层的γ参数稀疏化
        if cfg.PRUNE.TYPE != 'no':
            if hasattr(model, 'module'):
                bn_sparse.updateBN(model.module.backbone.module_list,
                                   cfg.PRUNE.SR, prune_idx)
            else:
                # print(model.backbone.module_list)
                bn_sparse.updateBN(model.backbone.module_list, cfg.PRUNE.SR,
                                   prune_idx)

        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time)
        if iteration % args.log_step == 0:
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            logger.info(
                meters.delimiter.join([
                    "iter: {iter:06d}",
                    "lr: {lr:.5f}",
                    '{meters}',
                    "eta: {eta}",
                    'mem: {mem}M',
                ]).format(
                    iter=iteration,
                    lr=optimizer.param_groups[0]['lr'],
                    meters=str(meters),
                    eta=eta_string,
                    mem=round(torch.cuda.max_memory_allocated() / 1024.0 /
                              1024.0),
                ))
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss',
                                          losses_reduced,
                                          global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name),
                                              loss_item,
                                              global_step=global_step)
                summary_writer.add_scalar('lr',
                                          optimizer.param_groups[0]['lr'],
                                          global_step=global_step)

        if iteration % args.save_step == 0:
            checkpointer.save("model_{:06d}".format(iteration), **arguments)

        if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
            eval_results = do_evaluation(cfg,
                                         model,
                                         distributed=False,
                                         iteration=iteration)  #单gpu测试
            if dist_util.get_rank() == 0 and summary_writer:
                for eval_result, dataset in zip(eval_results,
                                                cfg.DATASETS.TEST):
                    write_metric(eval_result['metrics'], 'metrics/' + dataset,
                                 summary_writer, iteration)
            model.train()  # *IMPORTANT*: change to train mode after eval.

    checkpointer.save("model_final", **arguments)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    return model
Beispiel #25
0
def do_train(cfg, model, data_loader, optimizer, checkpointer, arguments,
             scheduler):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    model.train()

    summary_writer = torch.utils.tensorboard.SummaryWriter(
        log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()
    scaler = torch.cuda.amp.GradScaler()
    print(model)
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        iteration = iteration + 1
        arguments["iteration"] = iteration
        images = torch_utils.to_cuda(images)
        targets = torch_utils.to_cuda(targets)

        # Casts operations to mixed precision
        with torch.cuda.amp.autocast():
            loss_dict = model(images.half(), targets=targets)
            loss = sum(loss for loss in loss_dict.values())

        meters.update(total_loss=loss, **loss_dict)

        optimizer.zero_grad()
        # Scales the loss, and calls backward()
        # to create scaled gradients
        scaler.scale(loss).backward()
        # loss.backward()
        # Unscales gradients and calls
        # or skips optimizer.step()
        scaler.step(optimizer)
        # optimizer.step(iteration)

        # Updates the scale for next iteration
        scaler.update()
        if iteration > 5000:
            scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time)
        if iteration % cfg.LOG_STEP == 0:
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            lr = optimizer.param_groups[0]['lr']
            to_log = [
                f"iter: {iteration:06d}",
                f"lr: {lr:.5f}",
                f'{meters}',
                f"eta: {eta_string}",
            ]
            if torch.cuda.is_available():
                mem = round(torch.cuda.max_memory_allocated() / 1024.0 /
                            1024.0)
                to_log.append(f'mem: {mem}M')
            logger.info(meters.delimiter.join(to_log))
            global_step = iteration
            summary_writer.add_scalar('losses/total_loss',
                                      loss,
                                      global_step=global_step)
            for loss_name, loss_item in loss_dict.items():
                summary_writer.add_scalar('losses/{}'.format(loss_name),
                                          loss_item,
                                          global_step=global_step)
            summary_writer.add_scalar('lr',
                                      optimizer.param_groups[0]['lr'],
                                      global_step=global_step)

        if iteration % cfg.MODEL_SAVE_STEP == 0:
            checkpointer.save("model_{:06d}".format(iteration), **arguments)

        if cfg.EVAL_STEP > 0 and iteration % cfg.EVAL_STEP == 0:
            eval_results = do_evaluation(cfg, model, iteration=iteration)
            for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST):
                write_metric(eval_result['metrics'], 'metrics/' + dataset,
                             summary_writer, iteration)
            model.train()  # *IMPORTANT*: change to train mode after eval.

        if iteration >= cfg.SOLVER.MAX_ITER:
            break

    checkpointer.save("model_final", **arguments)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    return model
Beispiel #26
0
def do_train(cfg, model, data_loader, optimizer, scheduler, checkpointer,
             device, arguments, args):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    model.train()
    save_to_disk = dist_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(
            log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))
    else:
        summary_writer = None

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()
    for iteration, (images, targets, _, boxes_norm,
                    labels_norm) in enumerate(data_loader, start_iter):
        iteration = iteration + 1
        arguments["iteration"] = iteration
        scheduler.step()

        images = images.to(device)
        targets = targets.to(device)
        #+++++++++++++++++++++++++++++++++++++++++++++++ Mask GT ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        mask_t = np.zeros((images.shape[0], 81, 64, 64))
        mask_t[:, 0, :, :] = np.ones((1, 1, 64, 64))
        for i in range(images.shape[0]):
            for L, B_norm in zip(labels_norm[i], boxes_norm[i]):
                xmin = int(B_norm[0] * 64)
                ymin = int(B_norm[1] * 64)
                xmax = int(B_norm[2] * 64)
                ymax = int(B_norm[3] * 64)
                lab = int(L)

                mask_t[i, 0, ymin:ymax, xmin:xmax] = 0.0
                mask_t[i, lab, ymin:ymax, xmin:xmax] = 1.0

        mask_t = Variable(torch.from_numpy((mask_t).astype(np.float32))).cuda()
        #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        loss_dict = model(images, targets=(targets, mask_t))
        loss = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(total_loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time)
        if iteration % args.log_step == 0:
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            logger.info(
                meters.delimiter.join([
                    "iter: {iter:06d}",
                    "lr: {lr:.5f}",
                    '{meters}',
                    "eta: {eta}",
                    'mem: {mem}M',
                ]).format(
                    iter=iteration,
                    lr=optimizer.param_groups[0]['lr'],
                    meters=str(meters),
                    eta=eta_string,
                    mem=round(torch.cuda.max_memory_allocated() / 1024.0 /
                              1024.0),
                ))
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss',
                                          losses_reduced,
                                          global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name),
                                              loss_item,
                                              global_step=global_step)
                summary_writer.add_scalar('lr',
                                          optimizer.param_groups[0]['lr'],
                                          global_step=global_step)

        if iteration % args.save_step == 0:
            checkpointer.save("model_{:06d}".format(iteration), **arguments)

        if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
            eval_results = do_evaluation(cfg,
                                         model,
                                         distributed=args.distributed,
                                         iteration=iteration)
            if dist_util.get_rank() == 0 and summary_writer:
                for eval_result, dataset in zip(eval_results,
                                                cfg.DATASETS.TEST):
                    write_metric(eval_result['metrics'], 'metrics/' + dataset,
                                 summary_writer, iteration)
            model.train()  # *IMPORTANT*: change to train mode after eval.

    checkpointer.save("model_final", **arguments)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    return model
Beispiel #27
0
def do_train(cfg, model, data_loader, optimizer, scheduler, checkpointer,
             device, arguments, args):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    # 模型设置为train()模式,表示参数是可以进行更新的
    model.train()
    save_to_disk = dist_util.get_rank() == 0
    # 这个是关于模型训练过程中的过程记录
    if args.use_tensorboard and save_to_disk:
        import tensorboardX

        summary_writer = tensorboardX.SummaryWriter(
            log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))
    else:
        summary_writer = None

    # dataloader的大小,根据配置文件中的iteration进行训练
    # arguments = {"iteration": 0},按照目前的理解是按照断点进行训练,这个表示的是当前的迭代次数这样
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    # 开始计时
    start_training_time = time.time()
    end = time.time()
    # 一次训练中,数据长度应该是dataloader的大小,也就是按照batchsize进行分割之后的大小
    # 数据集会返回图像和图像对应的标签,也就是(类别数目) (c+4)k,k个先验框、c个类别,然后加一个框的坐标位置
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        # print(iteration)
        # print(targets)
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = targets.to(device)
        # 把输入和目标输出传入模型,模型就会返回loss
        loss_dict = model(images, targets=targets)
        loss = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        # 这里是多GPU的操作,暂时先不用去理会
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(total_loss=losses_reduced, **loss_dict_reduced)

        # 这里是标准的反向传播的过程,传播就完事了
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # 记录时间、写日志、写模型然后保存训练中的过程记录之类的,这里也基本是死的,主要找到模型就完事了
        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time)
        if iteration % args.log_step == 0:
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            logger.info(
                meters.delimiter.join([
                    "iter: {iter:06d}",
                    "lr: {lr:.5f}",
                    '{meters}',
                    "eta: {eta}",
                    'mem: {mem}M',
                ]).format(
                    iter=iteration,
                    lr=optimizer.param_groups[0]['lr'],
                    meters=str(meters),
                    eta=eta_string,
                    mem=round(torch.cuda.max_memory_allocated() / 1024.0 /
                              1024.0),
                ))
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss',
                                          losses_reduced,
                                          global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name),
                                              loss_item,
                                              global_step=global_step)
                summary_writer.add_scalar('lr',
                                          optimizer.param_groups[0]['lr'],
                                          global_step=global_step)

        if iteration % args.save_step == 0:
            checkpointer.save("model_{:06d}".format(iteration), **arguments)

        # 目前问题主要存在这个部分,就是利用模型进行验证的过程中会报错,验证的文件有错误
        if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
            eval_results = do_evaluation(cfg,
                                         model,
                                         distributed=args.distributed,
                                         iteration=iteration)
            if dist_util.get_rank() == 0 and summary_writer:
                for eval_result, dataset in zip(eval_results,
                                                cfg.DATASETS.TEST):
                    write_metric(eval_result['metrics'], 'metrics/' + dataset,
                                 summary_writer, iteration)
            model.train()  # *IMPORTANT*: change to train mode after eval.

    checkpointer.save("model_final", **arguments)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    return model
Beispiel #28
0
def do_train_with_style(cfg, model, data_loader, style_loader, optimizer,
                        scheduler, checkpointer, device, arguments, args):
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    model.train()
    save_to_disk = dist_util.get_rank() == 0
    if args.use_tensorboard and save_to_disk:
        try:
            from torch.utils.tensorboard import SummaryWriter
        except ImportError:
            from tensorboardX import SummaryWriter
        summary_writer = SummaryWriter(
            log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))
    else:
        summary_writer = None

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()

    # prepare AdaIN models
    default_path = '/content/drive/MyDrive/DA_detection/models/'
    vgg_path = default_path + 'vgg_normalized.pth'
    if 'VGG_PATH' in os.environ:
        vgg_path = os.environ['VGG_PATH']
    decoder_path = default_path + 'decoder.pth'
    if 'DECODER_PATH' in os.environ:
        decoder_path = os.environ['DECODER_PATH']
    # DEBUG: print('AdaIN > models loaded')

    for iteration, (images, targets, ids) in enumerate(data_loader,
                                                       start_iter):
        iteration = iteration + 1
        arguments["iteration"] = iteration

        # AdaIN routine
        random.seed()
        styles = next(iter(style_loader))
        # DEBUG: print('AdaIN > begin new batch')
        if random.random() > args.p:
            apply_style_transfer(vgg_path, decoder_path, images, styles[0],
                                 args.p)

        # DEBUG: print('AdaIN > end batch')
        images = images.to(device)
        targets = targets.to(device)
        loss_dict = model(images, targets=targets)
        loss = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(total_loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time)
        if iteration % args.log_step == 0:
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            if device == "cuda":
                logger.info(
                    meters.delimiter.join([
                        "iter: {iter:06d}",
                        "lr: {lr:.5f}",
                        '{meters}',
                        "eta: {eta}",
                        'mem: {mem}M',
                    ]).format(
                        iter=iteration,
                        lr=optimizer.param_groups[0]['lr'],
                        meters=str(meters),
                        eta=eta_string,
                        mem=round(torch.cuda.max_memory_allocated() / 1024.0 /
                                  1024.0),
                    ))
            else:
                logger.info(
                    meters.delimiter.join([
                        "iter: {iter:06d}",
                        "lr: {lr:.5f}",
                        '{meters}',
                        "eta: {eta}",
                    ]).format(
                        iter=iteration,
                        lr=optimizer.param_groups[0]['lr'],
                        meters=str(meters),
                        eta=eta_string,
                    ))
            if summary_writer:
                global_step = iteration
                summary_writer.add_scalar('losses/total_loss',
                                          losses_reduced,
                                          global_step=global_step)
                for loss_name, loss_item in loss_dict_reduced.items():
                    summary_writer.add_scalar('losses/{}'.format(loss_name),
                                              loss_item,
                                              global_step=global_step)
                summary_writer.add_scalar('lr',
                                          optimizer.param_groups[0]['lr'],
                                          global_step=global_step)

        if iteration % args.save_step == 0:
            checkpointer.save("model_{:06d}".format(iteration), **arguments)

        if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
            eval_results = do_evaluation(cfg,
                                         model,
                                         distributed=args.distributed,
                                         iteration=iteration)
            if dist_util.get_rank() == 0 and summary_writer:
                for eval_result, dataset in zip(eval_results,
                                                cfg.DATASETS.TEST):
                    write_metric(eval_result['metrics'], 'metrics/' + dataset,
                                 summary_writer, iteration)
            model.train()  # *IMPORTANT*: change to train mode after eval.

    checkpointer.save("model_final", **arguments)
    # compute training time
    total_training_time = int(time.time() - start_training_time)
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / max_iter))
    return model
Beispiel #29
0
def normal_prune(cfg,model,pruned_cfg,file,regular,max,percent,quick,weight_path):
    obtain_num_parameters = lambda model: sum([param.nelement() for param in model.parameters()])
    origin_nparameters = obtain_num_parameters(model)
    origin_size=model_size(model)
    CBL_idx, Conv_idx, prune_idx=parse_module_defs(model.backbone.module_defs)

    # 将所有要剪枝的BN层的α参数,拷贝到bn_weights列表
    bn_weights = gather_bn_weights(model.backbone.module_list, prune_idx)
    # torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引
    sorted_bn = torch.sort(bn_weights)[0]

    # 避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限)
    # highest_thre = []
    # for idx in prune_idx:
    #     # .item()可以得到张量里的元素值
    #     highest_thre.append(model.backbone.module_list[idx][1].weight.data.abs().max().item())
    # highest_thre = min(highest_thre)
    # 找到highest_thre对应的下标对应的百分比
    # percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights)

    # print(f'Threshold should be less than {highest_thre:.4f}.')
    # print(f'The corresponding prune ratio should less than {percent_limit:.3f}.')      #这一行的限制只是为了防止normal剪枝某一层减空,如果保留这一层的一个,则没有这个限制了

    thre=prune_and_eval(model,sorted_bn,prune_idx,percent,cfg)
    if quick ==0:
        print('实际剪枝---》')
        predictor_channels=list(cfg.MODEL.BACKBONE.OUT_CHANNELS)
        # 获得保留的卷积核的个数和每层对应的mask,以及对应的head通道数
        num_filters, filters_mask,predictor_channels=obtain_filters_mask(model.backbone, thre, CBL_idx, prune_idx,predictor_channels,regular,max)
        # CBLidx2mask存储CBL_idx中,每一层BN层对应的mask
        CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
        pruned_model = prune_model_keep_size(cfg,model, prune_idx, CBL_idx, CBLidx2mask,Conv_idx)
        if max==0:
            with torch.no_grad():
                eval = do_evaluation(cfg,pruned_model, distributed=False)
            print('after prune_model_keep_size mAP is {}'.format(eval[0]['metrics']))       #对于最大剪枝,这里是不准的   相当于还没有截掉后面层

        # 获得原始模型的module_defs,并修改该defs中的卷积核数量
        compact_module_defs = deepcopy(model.backbone.module_defs)
        prune_after = -1  # cbl_idx索引号,后面的层都不要了(实际上看,是第一个BN偏置全零层最近的预测层之后的都被剪掉)  针对max
        if max==1:
            new_predictor_channels=[]
            for idx in CBL_idx:
                if model.backbone.module_defs[idx]['feature'] == 'linear' or model.backbone.module_defs[idx]['feature'] =='l2_norm':
                    i = int(model.backbone.module_defs[idx]['feature_idx'])
                    if predictor_channels[i] != -1:
                        new_predictor_channels.append(predictor_channels[i])
                        if i + 1 < len(predictor_channels):
                            if predictor_channels[i + 1] == -1:
                                prune_after = idx
                                break
                    if i+1==len(predictor_channels):
                        break
                elif model.backbone.module_defs[idx+1]['type']=='shortcut' and model.backbone.module_defs[idx+1]['feature']=='linear':
                    i = int(model.backbone.module_defs[idx+1]['feature_idx'])
                    new_predictor_channels.append(predictor_channels[i])#第一个short_cut连接head不会被裁掉
            predictor_channels=new_predictor_channels

        for idx, num in zip(CBL_idx, num_filters):
            assert compact_module_defs[idx]['type'] == 'convolutional'
            if idx==prune_after+1 and prune_after!=-1:
                compact_module_defs[idx]['filters']='-1'#这一层连同之后都不要
                break
            else:
                compact_module_defs[idx]['filters'] = str(num)

        write_cfg(pruned_cfg, compact_module_defs)
        print(f'Config file has been saved: {pruned_cfg}')
        cfg.MODEL.BACKBONE.OUT_CHANNELS=tuple(predictor_channels)
        print(f'PRUNED_MODEL.BACKBONE.OUT_CHANNELS:{cfg.MODEL.BACKBONE.OUT_CHANNELS}')
        cfg.MODEL.BACKBONE.CFG=pruned_cfg
        cfg.MODEL.BACKBONE.PRETRAINED=False   #定义模型时会加载预训练权重,这里不需要,因为之前的权重不匹配现在的通道数
        compact_model = build_detection_model(cfg)
        # print(compact_model)
        device = torch.device(cfg.MODEL.DEVICE)
        compact_model.to(device)
        init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask,prune_after)
        compact_nparameters = obtain_num_parameters(compact_model)
        compact_size = model_size(compact_model)
        random_input = torch.rand((16, 3, cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE)).to(device)
        pruned_forward_time = obtain_avg_forward_time(random_input, pruned_model)
        compact_forward_time = obtain_avg_forward_time(random_input, compact_model)
        # print(compact_model)
        # print(compact_model)
        with torch.no_grad():
            eval = do_evaluation(cfg, compact_model, distributed=False)
        print('Final pruned model mAP is {}'.format(eval[0]['metrics']))
        metric_table = [
            ["Metric", "Before", "After"],
            ["Parameters(M)", f"{origin_nparameters/(1024*1024)}", f"{compact_nparameters/(1024*1024)}"],
            ["模型体积(MB)", f"{origin_size}", f"{compact_size}"],
            ["Inference(ms)", f'{pruned_forward_time*1000/16:.4f}', f'{compact_forward_time*1000/16:.4f}']  #bs=16
        ]
        print(AsciiTable(metric_table).table)
        print(f'压缩率:{(origin_nparameters-compact_nparameters)/origin_nparameters}')
        file.write(f'PRUNED_MODEL.BACKBONE.OUT_CHANNELS:{cfg.MODEL.BACKBONE.OUT_CHANNELS}' + '\n')
        file.write(AsciiTable(metric_table).table + '\n')
        file.write(f'压缩率:{(origin_nparameters-compact_nparameters)/origin_nparameters}' + '\n')
        file.close()

        torch.save(compact_model.state_dict(),weight_path)
        print(f'Compact model has been saved.')