コード例 #1
0
def main():
    logger.info('******************************')
    logger.info(opt)
    logger.info('******************************')
    logger.info(cfg)
    logger.info('******************************')

    # Model Initialize
    m = preset_model(cfg)
    m = nn.DataParallel(m).cuda()

    criterion = builder.build_loss(cfg.LOSS).cuda()

    if cfg.TRAIN.OPTIMIZER == 'adam':
        optimizer = torch.optim.Adam(m.parameters(), lr=cfg.TRAIN.LR)
    elif cfg.TRAIN.OPTIMIZER == 'rmsprop':
        optimizer = torch.optim.RMSprop(m.parameters(), lr=cfg.TRAIN.LR)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=cfg.TRAIN.LR_STEP, gamma=cfg.TRAIN.LR_FACTOR)

    writer = SummaryWriter('.tensorboard/{}-{}'.format(opt.exp_id,
                                                       cfg.FILE_NAME))

    train_dataset = builder.build_dataset(cfg.DATASET.TRAIN,
                                          preset_cfg=cfg.DATA_PRESET,
                                          train=True)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE * num_gpu,
        shuffle=True,
        num_workers=opt.nThreads)

    heatmap_to_coord = get_func_heatmap_to_coord(cfg)

    opt.trainIters = 0

    for i in range(cfg.TRAIN.BEGIN_EPOCH, cfg.TRAIN.END_EPOCH):
        opt.epoch = i
        current_lr = optimizer.state_dict()['param_groups'][0]['lr']

        logger.info(
            f'############# Starting Epoch {opt.epoch} | LR: {current_lr} #############'
        )

        # Training
        loss, miou = train(opt, train_loader, m, criterion, optimizer, writer)
        logger.epochInfo('Train', opt.epoch, loss, miou)

        lr_scheduler.step()

        if (i + 1) % opt.snapshot == 0:
            # Save checkpoint
            torch.save(
                m.module.state_dict(),
                './exp/{}-{}/model_{}.pth'.format(opt.exp_id, cfg.FILE_NAME,
                                                  opt.epoch))
            # Prediction Test
            with torch.no_grad():
                gt_AP = validate_gt(m.module, opt, cfg, heatmap_to_coord)
                rcnn_AP = validate(m.module, opt, heatmap_to_coord)
                logger.info(
                    f'##### Epoch {opt.epoch} | gt mAP: {gt_AP} | rcnn mAP: {rcnn_AP} #####'
                )

        # Time to add DPG
        if i == cfg.TRAIN.DPG_MILESTONE:
            torch.save(
                m.module.state_dict(),
                './exp/{}-{}/final.pth'.format(opt.exp_id, cfg.FILE_NAME))
            # Adjust learning rate
            for param_group in optimizer.param_groups:
                param_group['lr'] = cfg.TRAIN.LR
            lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer, milestones=cfg.TRAIN.DPG_STEP, gamma=0.1)
            # Reset dataset
            train_dataset = builder.build_dataset(cfg.DATASET.TRAIN,
                                                  preset_cfg=cfg.DATA_PRESET,
                                                  train=True,
                                                  dpg=True)
            train_loader = torch.utils.data.DataLoader(
                train_dataset,
                batch_size=cfg.TRAIN.BATCH_SIZE * num_gpu,
                shuffle=True,
                num_workers=opt.nThreads)

    torch.save(m.module.state_dict(),
               './exp/{}-{}/final_DPG.pth'.format(opt.exp_id, cfg.FILE_NAME))
コード例 #2
0
def main():
    logger.info('******************************')
    logger.info(opt)
    logger.info('******************************')
    logger.info(cfg)
    logger.info('******************************')

    # Model Initialize
    m = preset_model(cfg)
    # todo: try to replace with distributedDataParallel to see if it is faster
    m = nn.DataParallel(m)
    if opt.device.type != 'cpu':
        m = m.cuda()

    criterion = builder.build_loss(cfg.LOSS)
    if opt.device.type != 'cpu':
        criterion = criterion.cuda()

    if cfg.TRAIN.OPTIMIZER == 'adam':
        optimizer = torch.optim.Adam(m.parameters(), lr=cfg.TRAIN.LR)
    elif cfg.TRAIN.OPTIMIZER == 'rmsprop':
        optimizer = torch.optim.RMSprop(m.parameters(), lr=cfg.TRAIN.LR)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=cfg.TRAIN.LR_STEP, gamma=cfg.TRAIN.LR_FACTOR)

    if opt.clean:
        if opt.tensorboard_path.exists():
            shutil.rmtree(opt.tensorboard_path)
        if opt.experiment_path.exists():
            shutil.rmtree(opt.experiment_path)
    opt.tensorboard_path.mkdir(exist_ok=True, parents=True)
    opt.experiment_path.mkdir(exist_ok=True, parents=True)
    writer = SummaryWriter(str(opt.tensorboard_path))

    train_dataset = builder.build_dataset(cfg.DATASET.TRAIN,
                                          preset_cfg=cfg.DATA_PRESET,
                                          train=True)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE * max(1, num_gpu),
        shuffle=True,
        num_workers=opt.nThreads)

    heatmap_to_coord = get_func_heatmap_to_coord(cfg)

    opt.trainIters = 0

    scaler = GradScaler()

    for i in range(cfg.TRAIN.BEGIN_EPOCH, cfg.TRAIN.END_EPOCH):
        opt.epoch = i
        current_lr = optimizer.state_dict()['param_groups'][0]['lr']

        logger.info(
            f'############# Starting Epoch {opt.epoch} | LR: {current_lr} #############'
        )

        # Training
        loggers = train(opt, train_loader, m, criterion, optimizer, writer,
                        scaler)
        logger.info(
            f'Train-{opt.epoch:d} epoch | '
            f'{" | ".join(f"{name}:{l.avg:.07f}" for name, l in loggers.items())}'
        )

        lr_scheduler.step()

        if (i + 1) % opt.snapshot == 0:
            # Save checkpoint
            torch.save(m.module.state_dict(),
                       str(opt.experiment_path / f'model_{opt.epoch}.pth'))
            # Prediction Test
            with torch.no_grad():
                metrics_on_true_box = validate_gt(m.module, opt, cfg,
                                                  heatmap_to_coord)
                gt_AP = metrics_on_true_box["map"]
                gt_radius_mse = metrics_on_true_box["radius_mse"]
                rcnn_AP = validate(m.module, opt, heatmap_to_coord)
                logger.info(f'##### Epoch {opt.epoch} | '
                            f'gt mAP: {gt_AP} | '
                            f'rcnn mAP: {rcnn_AP} | '
                            f'gt radius_mse {gt_radius_mse}'
                            f' #####')

            writer.add_scalar(f'Validation/mAP_on_gt_box', gt_AP,
                              opt.trainIters)
            writer.add_scalar(f'Validation/mAP_on_pred_box', rcnn_AP,
                              opt.trainIters)
            writer.add_scalar(f'Validation/radius_mse_on_gt_box',
                              gt_radius_mse, opt.trainIters)

        # Time to add DPG
        if i == cfg.TRAIN.DPG_MILESTONE:
            torch.save(m.module.state_dict(),
                       str(opt.experiment_path / "final.pth"))
            # Adjust learning rate
            for param_group in optimizer.param_groups:
                param_group['lr'] = cfg.TRAIN.LR
            lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer, milestones=cfg.TRAIN.DPG_STEP, gamma=0.1)
            # Reset dataset
            train_dataset = builder.build_dataset(cfg.DATASET.TRAIN,
                                                  preset_cfg=cfg.DATA_PRESET,
                                                  train=True,
                                                  dpg=True)
            train_loader = torch.utils.data.DataLoader(
                train_dataset,
                batch_size=cfg.TRAIN.BATCH_SIZE * max(1, num_gpu),
                shuffle=True,
                num_workers=opt.nThreads)

    torch.save(m.module.state_dict(),
               str(opt.experiment_path / 'final_DPG.pth'))