コード例 #1
0
def main(cfg):
    if cfg.SEED_VALUE >= 0:
        print(f'Seed value for the experiment {cfg.SEED_VALUE}')
        os.environ['PYTHONHASHSEED'] = str(cfg.SEED_VALUE)
        random.seed(cfg.SEED_VALUE)
        torch.manual_seed(cfg.SEED_VALUE)
        np.random.seed(cfg.SEED_VALUE)

    logger = create_logger(cfg.LOGDIR, phase='train')
    '''
    logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
    logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')

    logger.info(pprint.pformat(cfg))


    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    writer = SummaryWriter(log_dir=cfg.LOGDIR)
    writer.add_text('config', pprint.pformat(cfg), 0)
    '''
    # ========= Dataloaders ========= #
    data_loaders = get_data_loaders(cfg)

    # ========= Compile Loss ========= #
    loss = VIBELoss(
        e_pose_loss_weight=cfg.LOSS.POSE_W,
        e_shape_loss_weight=cfg.LOSS.SHAPE_W,
    )
    print(loss)
    # ========= Initialize networks, optimizers and lr_schedulers ========= #
    # temporal generator include the neural network ResNet50,temporal encoder and smpl regressor
    # VIBE
    generator = VIBE_Demo(
        batch_size=cfg.TRAIN.BATCH_SIZE,  # 小批量训练 64
    )
    print(generator)
    a = torch.ones((1, 2048))
    z = generator(a)
    for c in z:
        for key in c.keys():
            print(key)
            print(c[key].shape)

    #  定义generator 模型优化算法  常见的优化算法有: sgd,adam
    gen_optimizer = get_optimizer(
        model=generator,  # 模型
        optim_type=cfg.TRAIN.GEN_OPTIM,  # 使用优化算法的类型,有:sgd,adam
        lr=cfg.TRAIN.GEN_LR,  # 学习率
        weight_decay=cfg.TRAIN.GEN_WD,  # regularization 超参数设置
        momentum=cfg.TRAIN.GEN_MOMENTUM,  # 动量法 超参数
    )
コード例 #2
0
def main():

    args = parse_args()

    logger, final_output_dir, tb_log_dir = \
        utils.create_logger(config, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.determinstic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED
    gpus = list(config.GPUS)

    dataset_type = get_dataset(config)
    train_data = dataset_type(config, is_train=True)
    train_loader = DataLoader(dataset=train_data,
                              batch_size=config.TRAIN.BATCH_SIZE_PER_GPU *
                              len(gpus),
                              shuffle=config.TRAIN.SHUFFLE,
                              num_workers=config.WORKERS,
                              pin_memory=config.PIN_MEMORY)

    val_loader = DataLoader(dataset=dataset_type(config, is_train=False),
                            batch_size=config.TEST.BATCH_SIZE_PER_GPU *
                            len(gpus),
                            shuffle=False,
                            num_workers=config.WORKERS,
                            pin_memory=config.PIN_MEMORY)

    # config.MODEL.NUM_JOINTS = train_data.get_num_points()
    model = models.get_face_alignment_net(config)

    # copy model files
    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    model = nn.DataParallel(model, device_ids=gpus).cuda()

    # loss
    criterion = torch.nn.MSELoss(size_average=True).cuda()

    optimizer = utils.get_optimizer(config, model)

    best_nme = 100
    last_epoch = config.TRAIN.BEGIN_EPOCH

    if isinstance(config.TRAIN.LR_STEP, list):
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
            last_epoch - 1)
    else:
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       config.TRAIN.LR_STEP,
                                                       config.TRAIN.LR_FACTOR,
                                                       last_epoch - 1)

    if config.TRAIN.RESUME:
        model_state_file = os.path.join(final_output_dir, 'latest.pth')
        if os.path.islink(model_state_file):
            checkpoint = torch.load(model_state_file)
            last_epoch = checkpoint['epoch']
            best_nme = checkpoint['best_nme']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found")

    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):
        lr_scheduler.step()

        function.train(config, train_loader, model, criterion, optimizer,
                       epoch, writer_dict)

        # evaluate
        nme, predictions = function.validate(config, val_loader, model,
                                             criterion, epoch, writer_dict)

        is_best = nme < best_nme
        best_nme = min(nme, best_nme)

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        print("best:", is_best)
        utils.save_checkpoint(
            {
                "state_dict": model,
                "epoch": epoch + 1,
                "best_nme": best_nme,
                "optimizer": optimizer.state_dict(),
            }, predictions, is_best, final_output_dir,
            'checkpoint_{}.pth'.format(epoch))

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        'saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
コード例 #3
0
ファイル: tran_3dwp.py プロジェクト: wuyu-create/code
def main(cfg):
    if cfg.SEED_VALUE >= 0:
        print(f'Seed value for the experiment {cfg.SEED_VALUE}')
        os.environ['PYTHONHASHSEED'] = str(cfg.SEED_VALUE)
        random.seed(cfg.SEED_VALUE)
        torch.manual_seed(cfg.SEED_VALUE)
        np.random.seed(cfg.SEED_VALUE)

    logger = create_logger(cfg.LOGDIR, phase='train')

    logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
    logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')

    logger.info(pprint.pformat(cfg))

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    writer = SummaryWriter(log_dir=cfg.LOGDIR)
    writer.add_text('config', pprint.pformat(cfg), 0)

    # ========= Dataloaders ========= #
    data_loaders = get_data_loaders(cfg)


    # ========= Compile Loss ========= #
    loss = VIBELoss(
        e_pose_loss_weight=cfg.LOSS.POSE_W,
        e_shape_loss_weight=cfg.LOSS.SHAPE_W,
    )
    # ========= Initialize networks, optimizers and lr_schedulers ========= #

    # temporal generator include the neural network ResNet50,temporal encoder and smpl regressor
    # VIBE
    generator = VIBE_Demo(
        batch_size=cfg.TRAIN.BATCH_SIZE,            # 小批量训练 64
    ).to(cfg.DEVICE)


    #  定义generator 模型优化算法  常见的优化算法有: sgd,adam
    gen_optimizer = get_optimizer(
        model=generator,                 # 模型
        optim_type=cfg.TRAIN.GEN_OPTIM,  # 使用优化算法的类型,有:sgd,adam
        lr=cfg.TRAIN.GEN_LR,             # 学习率
        weight_decay=cfg.TRAIN.GEN_WD,   #  regularization 超参数设置
        momentum=cfg.TRAIN.GEN_MOMENTUM, #  动量法 超参数
    )

    # generator 中学习率的调整
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        gen_optimizer,
        mode='min',
        factor=0.1,
        patience=cfg.TRAIN.LR_PATIENCE,
        verbose=True,
    )

    # ========= Start Training ========= #
    Trainer(
        data_loaders=data_loaders,
        generator=generator,
        criterion=loss,
        gen_optimizer=gen_optimizer,
        start_epoch=cfg.TRAIN.START_EPOCH,
        end_epoch=cfg.TRAIN.END_EPOCH,
        device=cfg.DEVICE,
        writer=writer,
        debug=cfg.DEBUG,
        logdir=cfg.LOGDIR,
        lr_scheduler=lr_scheduler,
        num_iters_per_epoch=cfg.TRAIN.NUM_ITERS_PER_EPOCH,
        debug_freq=cfg.DEBUG_FREQ,
    ).fit()
コード例 #4
0
def main():
    args = parse_args()
    reset_config(config, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        config, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.'+config.MODEL.NAME+'.get_seg_net')(
        config, is_train=True
    )

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', config.MODEL.NAME + '.py'),
        final_output_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
        'vis_global_steps': 0,
    }

    # dump_input = torch.rand((config.TRAIN.BATCH_SIZE,
    #                          3,
    #                          config.MODEL.IMAGE_SIZE[1],
    #                          config.MODEL.IMAGE_SIZE[0]))
    # writer_dict['writer'].add_graph(model, (dump_input, ), verbose=False)

    gpus = [int(i) for i in config.GPUS.split(',')]
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

    optimizer = get_optimizer(config, model)

    # Data loading code
    if 'xception' in config.MODEL.NAME:
        # Xception uses different mean std for input image
        normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                         std=[0.5, 0.5, 0.5])
    else:
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

    train_augs = aug.Compose([aug.RandomScale(0.5, 2.0),
                              aug.RandomHorizontallyFlip(0.5),
                              aug.RandomSizedCrop(config.MODEL.IMAGE_SIZE)])

    test_augs = None

    train_dataset = eval('dataset.'+config.DATASET.DATASET)(
        config,
        config.DATASET.ROOT,
        config.DATASET.TRAIN_SET,
        transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]),
        augmentations=train_augs
    )
    valid_dataset = eval('dataset.'+config.DATASET.DATASET)(
        config,
        config.DATASET.ROOT,
        config.DATASET.TEST_SET,
        transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]),
        augmentations=test_augs
    )

    # define loss function (criterion) and optimizer
    criterion = CrossEntropy2D(ignore_index=255, weight=train_dataset.class_weights).cuda()

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE*len(gpus),
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=True,
        drop_last=True if len(gpus) > 2 else False  # PyTorch's DataParallel model cannot handle 0 image on either of the GPUs
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.TEST.BATCH_SIZE,
        shuffle=False,
        num_workers=config.WORKERS,
        pin_memory=True
    )

    if config.TRAIN.LR_SCHEDULER == 'multistep':
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR
        )
    elif config.TRAIN.LR_SCHEDULER == 'poly':
        max_iter = config.TRAIN.END_EPOCH * len(train_loader)
        lr_scheduler = PolynomialLR(optimizer, max_iter=max_iter, decay_iter=1)
    elif config.TRAIN.LR_SCHEDULER == 'none':
        lr_scheduler = None
    else:
        raise ValueError('Scheduler {} not supported'.format(config.TRAIN.LR_SCHEDULER))

    best_perf = 0.0
    best_model = False
    for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH):
        if config.TRAIN.LR_SCHEDULER == 'multistep':
            lr_scheduler.step()

        # train for one epoch
        train(config, train_loader, model, criterion, optimizer, lr_scheduler, epoch,
              final_output_dir, tb_log_dir, writer_dict)


        if (epoch + 1) % config.TRAIN.EVAL_INTERVAL == 0:
            if not config.MODEL.LEARN_GAMMA:
                if float(lr_scheduler.last_epoch) / (lr_scheduler.max_iter * config.TRAIN.NE_ITER_RATIO) <= 1:
                    gamma = (config.TRAIN.NE_GAMMA_U - config.TRAIN.NE_GAMMA_L) * \
                            (1 - float(lr_scheduler.last_epoch) / (lr_scheduler.max_iter * config.TRAIN.NE_ITER_RATIO) ) ** \
                            config.TRAIN.NE_GAMMA_EXP + config.TRAIN.NE_GAMMA_L
                else:
                    gamma = config.TRAIN.NE_GAMMA_L
            else:
                gamma = None

            # evaluate on validation set
            perf_indicator = validate(config, valid_loader, valid_dataset, model,
                                      criterion, final_output_dir, tb_log_dir,
                                      writer_dict, gamma=gamma)

            if perf_indicator > best_perf:
                best_perf = perf_indicator
                best_model = True
            else:
                best_model = False

            logger.info('=> saving checkpoint to {}'.format(final_output_dir))
            save_checkpoint({
                'epoch': epoch + 1,
                'model': get_model_name(config),
                'state_dict': model.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)
        else:
            perf_indicator = 0.0

    final_model_state_file = os.path.join(final_output_dir,
                                          'final_state.pth.tar')
    logger.info('saving final model state to {}'.format(
        final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
コード例 #5
0
def main():
    args = parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = '{}'.format(args.gpu)
    reset_config(config, args)
    logger, final_output_dir, tb_log_dir = create_logger(
        config, args.cfg, 'train')
    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('{}.get_pose_net'.format(args.model))(config, is_train=True)
    model.eval()
    params = count_parameters_in_MB(model)
    logger.info("Params = %.2fMB" % params)
    mult_adds = comp_multadds(model,
                              input_size=(3, config.MODEL.IMAGE_SIZE[1],
                                          config.MODEL.IMAGE_SIZE[0]))
    logger.info("Mult-Adds = %.2fMB" % mult_adds)
    model.train()
    model = model.cuda()

    # copy model file
    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda()
    optimizer = get_optimizer(config, model)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR)

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + config.DATASET.DATASET)(
        config, config.DATASET.ROOT, config.DATASET.TRAIN_SET, True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + config.DATASET.DATASET)(
        config, config.DATASET.ROOT, config.DATASET.TEST_SET, False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE,
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.TEST.BATCH_SIZE,
        shuffle=False,
        num_workers=config.WORKERS,
        pin_memory=True)

    best_perf = 0.0
    for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH):
        # train for one epoch
        train(config, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir)
        # evaluate on validation set
        perf_indicator = validate(config, valid_loader, valid_dataset, model,
                                  criterion, final_output_dir, tb_log_dir)

        if perf_indicator > best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': get_model_name(config),
                'state_dict': model.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir,
                                          'final_state.pth.tar')
    logger.info(
        'saving final model state to {}'.format(final_model_state_file))
    torch.save(model.state_dict(), final_model_state_file)

    lr_scheduler.step()
コード例 #6
0
def main():

    # load config
    config = parse_arg()

    # create output folder
    output_dict = utils.create_log_folder(config, phase='train')

    # cudnn
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED

    # writer dict
    writer_dict = {
        'writer': SummaryWriter(log_dir=output_dict['tb_dir']),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # construct face related neural networks
    model = crnn.get_crnn(config)
    #
    # checkpoint = torch.load('/data/yolov5/CRNN_Chinese_Characters_Rec/output/OWN/crnn/2020-09-15-22-13/checkpoints/checkpoint_98_acc_1.0983.pth')
    # if 'state_dict' in checkpoint.keys():
    #     model.load_state_dict(checkpoint['state_dict'])
    # else:
    #     model.load_state_dict(checkpoint)
    # get device
    if torch.cuda.is_available():
        device = torch.device("cuda:{}".format(config.GPUID))
    else:
        device = torch.device("cpu:0")

    model = model.to(device)

    # define loss function
    # criterion = torch.nn.CTCLoss()
    criterion = CTCLoss()

    last_epoch = config.TRAIN.BEGIN_EPOCH
    optimizer = utils.get_optimizer(config, model)
    if isinstance(config.TRAIN.LR_STEP, list):
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
            last_epoch - 1)
    else:
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       config.TRAIN.LR_STEP,
                                                       config.TRAIN.LR_FACTOR,
                                                       last_epoch - 1)

    if config.TRAIN.FINETUNE.IS_FINETUNE:
        model_state_file = config.TRAIN.FINETUNE.FINETUNE_CHECKPOINIT
        if model_state_file == '':
            print(" => no checkpoint found")
        checkpoint = torch.load(model_state_file, map_location='cpu')
        if 'state_dict' in checkpoint.keys():
            checkpoint = checkpoint['state_dict']

        from collections import OrderedDict
        model_dict = OrderedDict()
        for k, v in checkpoint.items():
            if 'cnn' in k:
                model_dict[k[4:]] = v
        model.cnn.load_state_dict(model_dict)
        if config.TRAIN.FINETUNE.FREEZE:
            for p in model.cnn.parameters():
                p.requires_grad = False

    elif config.TRAIN.RESUME.IS_RESUME:
        model_state_file = config.TRAIN.RESUME.FILE
        if model_state_file == '':
            print(" => no checkpoint found")
        checkpoint = torch.load(model_state_file, map_location='cpu')
        if 'state_dict' in checkpoint.keys():
            model.load_state_dict(checkpoint['state_dict'])
            last_epoch = checkpoint['epoch']
            # optimizer.load_state_dict(checkpoint['optimizer'])
            # lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        else:
            model.load_state_dict(checkpoint)

    model_info(model)
    train_dataset = get_dataset(config)(config, is_train=True)
    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=config.PIN_MEMORY,
    )

    val_dataset = get_dataset(config)(config, is_train=False)
    val_loader = DataLoader(
        dataset=val_dataset,
        batch_size=config.TEST.BATCH_SIZE_PER_GPU,
        shuffle=config.TEST.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=config.PIN_MEMORY,
    )

    best_acc = 0.5
    converter = utils.strLabelConverter(config.DATASET.ALPHABETS)
    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):

        function.train(config, train_loader, train_dataset, converter, model,
                       criterion, optimizer, device, epoch, writer_dict,
                       output_dict)
        lr_scheduler.step()

        acc = function.validate(config, val_loader, val_dataset, converter,
                                model, criterion, device, epoch, writer_dict,
                                output_dict)

        is_best = acc > best_acc
        best_acc = max(acc, best_acc)

        print("is best:", is_best)
        print("best acc is:", best_acc)
        # save checkpoint
        torch.save(
            {
                "state_dict": model.state_dict(),
                "epoch": epoch + 1,
                # "optimizer": optimizer.state_dict(),
                # "lr_scheduler": lr_scheduler.state_dict(),
                "best_acc": best_acc,
            },
            os.path.join(output_dict['chs_dir'],
                         "checkpoint_{}_acc_{:.4f}.pth".format(epoch, acc)))

    writer_dict['writer'].close()
コード例 #7
0
def main():
    args = parse_args()
    update_config(cfg, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg,
                                                               is_train=True)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # logger.info(pprint.pformat(model))

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
    # writer_dict['writer'].add_graph(model, (dump_input))

    logger.info(get_model_summary(model, dump_input))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda()

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
        transforms.Compose([transforms.ToTensor(), normalize]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
        transforms.Compose([transforms.ToTensor(), normalize]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)

    best_perf = 0.0
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)
        lr_scheduler.step()

        # evaluate on validation set
        perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                  criterion, final_output_dir, tb_log_dir,
                                  writer_dict)

        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
コード例 #8
0
pytorch_total_params = sum(p.numel() for p in model.parameters())

print('TOTAL TRAINABLE parameters', pytorch_total_TR_params)
print('TOTAL parameters', pytorch_total_params)

## MY loss
loss = VIBELoss2(
    e_loss_weight=1.0,
    e_3d_loss_weight=50.0,
    e_pose_loss_weight=100.0,
)

gen_optimizer = get_optimizer(
    model=model,
    optim_type='Adam',
    lr=0.0001,
    weight_decay=0.0,
    momentum=0.9,
)

## EXTRA things bewlow

#import torch
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from cv2 import projectPoints
''' GLOBAL VARIABLES '''
angle_idx = 0  # Bone angle to adjust
direction = 0  # Direction to rotate, (0 - x, 1 - y, 2 - z) for upper arm only
step = 3  # 3 degrees for step size
step_radian = step * np.pi / 180
コード例 #9
0
def main():
    final_output_dir = 'output'
    args = parse_args()
    reset_config(config, args)

    logger, final_output_dir, tb_log_dir = create_logger(config, 'train')

    logger.info(pprint.pformat(config))

    # CuDNN
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED

    # HRNet Model
    mv_hrnet = get_pose_net(config, is_train=True)
    #pose_hrnet = get_pose_net(config, is_train=True)  # Pose estimation model
    #pose_hrnet.load_state_dict(torch.load(config.NETWORK.PRETRAINED), strict=False)  # Pretrained weight loading
    #mv_hrnet = get_multiview_pose_net(pose_hrnet, config)  # Multiview adopting
    #depth_hrnet = get_pose_net(config, is_train=True)  # 2.5D depth prediction model

    # Multi GPUs Setting
    gpus = [int(i) for i in config.GPUS.split(',')]
    mv_hrnet = torch.nn.DataParallel(mv_hrnet, device_ids=gpus).cuda()
    logger.info('=> init data parallel model')

    # Loss
    criterion = JointsMSELoss(
        use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda()
    logger.info('=> init criterion')

    # Optimizer
    optimizer = get_optimizer(config, mv_hrnet)
    logger.info('=> init {} optimizer'.format(config.TRAIN.OPTIMIZER))

    # Loading checkpoint
    start_epoch = config.TRAIN.BEGIN_EPOCH
    if config.TRAIN.RESUME:
        start_epoch, mv_hrnet, optimizer = load_checkpoint(
            mv_hrnet, optimizer, final_output_dir)

    # Scheduler
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR)
    logger.info('=> init scheduler')

    # Summary
    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # Data loader
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    logger.info('=> loading train dataset')
    train_dataset = H36MDataset(
        config, config.DATASET.TRAIN_SUBSET, True,
        transforms.Compose([transforms.ToTensor(), normalize]))
    #train_dataset = MultiViewH36M(config, config.DATASET.TRAIN_SUBSET, True, transforms.Compose([transforms.ToTensor(), normalize]))
    logger.info('=> loading validation dataset')
    valid_dataset = H36MDataset(
        config, config.DATASET.TEST_SUBSET, False,
        transforms.Compose([transforms.ToTensor(), normalize]))

    logger.info('=> loading train dataloader')
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE * len(gpus),
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=True)

    logger.info('=> loading valid dataloader')
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.TEST.BATCH_SIZE * len(gpus),
        shuffle=False,
        num_workers=config.WORKERS,
        pin_memory=True)

    # Training loop
    best_perf = 0.0
    best_model = False
    for epoch in range(start_epoch, config.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # Trainer
        train(config, train_loader, mv_hrnet, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)

        # Performance indicator
        perf_indicator = validate(config, valid_loader, valid_dataset,
                                  mv_hrnet, criterion, final_output_dir,
                                  tb_log_dir, writer_dict)

        if perf_indicator > best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': get_model_name(config),
                'state_dict': mv_hrnet.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    # End
    final_model_state_file = os.path.join(final_output_dir,
                                          'final_state.pth.tar')
    logger.info(
        'saving final model state to {}'.format(final_model_state_file))
    torch.save(mv_hrnet.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
コード例 #10
0
def main():
    args = parse_args()
    reset_config(config, args)
    set_cudnn(config)
    seed = config.RANDOM_SEED
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = config.GPU.strip()
    gpus = list(range(len(config.GPU.strip().split(','))))

    logger, final_output_dir, tb_log_dir = create_logger(config, args.cfg)
    summary_writer = SummaryWriter(log_dir=tb_log_dir)

    this_dir = osp.dirname(__file__)
    # backup the source code and the yaml config
    if args.cfg:
        shutil.copy(args.cfg, osp.join(final_output_dir,
                                       osp.basename(args.cfg)))
    if not osp.exists(osp.join(final_output_dir, "lib")):
        shutil.copytree(osp.join(this_dir, "../lib/"),
                        osp.join(final_output_dir, "lib"))
    for k, v in config.items():
        logger.info(f"{k}: {v}")

    # conditional import
    if config.TRAIN.FINETUNE_ROTATER:
        from lib.core.function3 import train, validate, evaluate
    elif config.TRAIN.USE_CYCLE:
        from lib.core.function2 import train, validate, evaluate
    else:
        from lib.core.function1 import train, validate, evaluate

    # build model
    logger.info('start building model.')
    if len(gpus) > 1:
        pose_model = torch.nn.DataParallel(get_pose_model(config)).cuda(
            gpus[0])
        discriminator = torch.nn.DataParallel(get_discriminator(config)).cuda(
            gpus[0])
        temp_discriminator = torch.nn.DataParallel(
            get_discriminator(config)).cuda(gpus[0])
    else:
        pose_model = get_pose_model(config).cuda()
        discriminator = get_discriminator(config, is_temp=False).cuda()
        temp_discriminator = get_discriminator(config, is_temp=True).cuda()
    optimizer_g = get_optimizer(config, pose_model, is_dis=False)
    optimizer_d = get_optimizer(config, discriminator, is_dis=True)
    optimizer_d_temp = get_optimizer(config,
                                     temp_discriminator,
                                     is_dis=True,
                                     is_temp=True)
    step_size, gamma = config.TRAIN.SCHEDULER_STEP_SIZE, config.TRAIN.SCHEDULER_GAMMA
    scheduler_g = lr_scheduler.StepLR(optimizer_g,
                                      step_size=step_size,
                                      gamma=gamma)
    scheduler_d = lr_scheduler.StepLR(optimizer_d,
                                      step_size=step_size,
                                      gamma=gamma)
    scheduler_temp = lr_scheduler.StepLR(optimizer_d_temp,
                                         step_size=step_size,
                                         gamma=gamma)
    logger.info('finished building model.')
    # print out the model arch
    if config.TRAIN.PRETRAIN_LIFTER:
        print("Load pretrained lifter...")
        state_dict = torch.load(
            config.TRAIN.LIFTER_PRETRAIN_PATH)['pose_model_state_dict']
        # state_dict = {k[7:]:v for k, v in state_dict.items()}
        pose_model.load_state_dict(state_dict, strict=False)

    if config.DATA.DATASET_NAME == 'surreal':
        loader_func = surreal
    else:
        loader_func = h36m if config.DATA.DATASET_NAME == "h36m" else mpiinf
    dataset_train = loader_func(config, is_train=True)
    dataset_test = loader_func(config, is_train=False)

    train_loader = DataLoader(dataset=dataset_train,
                              batch_size=config.BATCH_SIZE,
                              shuffle=True,
                              drop_last=False,
                              pin_memory=True,
                              num_workers=config.NUM_WORKERS)
    test_loader = DataLoader(dataset=dataset_test,
                             batch_size=config.BATCH_SIZE,
                             shuffle=False,
                             drop_last=False,
                             pin_memory=True,
                             num_workers=config.NUM_WORKERS)

    if args.eval:
        prefix = config.DATA.DATASET_NAME
        # for mode in ['train', 'valid']:
        for mode in ['valid']:
            is_train = True if mode == 'train' else False
            v3d_to_ours = [
                3, 2, 1, 4, 5, 6, 16, 15, 14, 11, 12, 13, 8, 0, 7, 9, 10
            ] if prefix == "h36m" else np.arange(config.DATA.NUM_JOINTS)
            mpi2h36m = [
                10, 9, 8, 11, 12, 13, 4, 3, 2, 5, 6, 7, 1, 14, 15, 16, 0
            ]
            if prefix == 'surreal':
                indices = np.arange(config.DATA.NUM_JOINTS)
            else:
                indices = v3d_to_ours if prefix == "h36m" else mpi2h36m
            mode = "train" if is_train else "valid"
            read_name = f"../data/{prefix}_{mode}_pred3.h5"
            # read_name = f"../../unsupervised_mesh/data/h36m_{mode}_pred_3d_mesh.h5"
            save_name = f"../data/{prefix}_{mode}_pred_3d.h5"
            if args.eval_suffix is not None:
                save_name = save_name[:-3] + "_" + args.eval_suffix + ".h5"

            # eval mode, load the pretrained model and generate the 3d prediction of all 3ds
            if not config.TRAIN.PRETRAIN_LIFTER:
                raise Warning(
                    "You are not using a pretrain model... may be you can specify --pretrain flag"
                )
            dataloader = DataLoader(dataset_train if mode == "train" else dataset_test, batch_size=config.BATCH_SIZE, \
                shuffle=False, drop_last=False, pin_memory=True, num_workers=config.NUM_WORKERS)
            all_out_data = evaluate(dataloader,
                                    pose_model,
                                    config,
                                    is_train=(mode == "train"))
            p1_mpjpe, p2_mpjpe = all_out_data['p1_mpjpe'], all_out_data[
                'p2_mpjpe']
            # read out imagenames
            print("Reading imagenames and joints 2d...")
            fin = h5py.File(read_name, "r")
            fout = h5py.File(save_name, "w")
            imagenames = fin['imagename'][:].copy()
            joints_2d_gt = np.array(fin['joint_2d_gt'])
            fout['imagename'] = imagenames
            fout['joint_2d_gt'] = joints_2d_gt[:, indices]
            fout['joint_3d_gt'] = all_out_data['joint_3d_gt']
            fout['joint_3d_pre'] = all_out_data['joint_3d_pre']
            possible_same_keys = [
                'shape', 'pose', 'original_joint_2d_gt', 'joint_2d_pre',
                'seqlen'
            ]

            for key in possible_same_keys:
                if key in fin.keys():
                    if 'joint' in key:
                        fout[key] = np.array(fin[key])[:, indices]
                    else:
                        fout[key] = np.array(fin[key])
            if 'seqname' in fin.keys():
                fout['seqname'] = fin['seqname'][:].copy()

            if 'auc' in all_out_data.keys():
                fout['auc'] = all_out_data['auc']
                fout['pckh5'] = all_out_data['pckh5']
                fout['auc_p2'] = all_out_data['auc_p2']
                fout['pckh5_p2'] = all_out_data['pckh5_p2']
            if 'scales' in all_out_data.keys():
                fout['scale_pre'] = all_out_data['scales']
            if 'scale_mids' in all_out_data.keys():
                fout['scale_mid_pre'] = all_out_data['scale_mids']

            fin.close()
            fout.close()
            print(
                "Evaluation on the {} set finished. P1 Mpjpe: {:.3f}, P2 Mpjpe: {:.3f}, saved to {}"
                .format("training" if is_train else "test", p1_mpjpe, p2_mpjpe,
                        save_name))
            if prefix == "mpi":
                print("[email protected]: {:.3f}, AUC: {:.3f}".format(
                    all_out_data['pckh5'], all_out_data['auc']))
                print("P2: [email protected]: {:.3f}, AUC: {:.3f}".format(
                    all_out_data['pckh5_p2'], all_out_data['auc_p2']))
        # uncomment this if you need to plot images
        # print("Rendering sequences...")
        # subprocess.call(f'python render.py --seq_num 10 --in_filename ../data/{prefix}_valid_pred_3d.h5 --save_dir ../vis', shell=True)
        return

    # preparation for visualization & perseq optimization(optional)
    if config.USE_GT:
        # note that the gt here is not the gt above(config.USE_GT)
        train_path = f"../data/{config.DATA.DATASET_NAME}_train_scales.pkl"
        valid_path = f"../data/{config.DATA.DATASET_NAME}_valid_scales.pkl"
    else:
        train_path = f"../data/{config.DATA.DATASET_NAME}_train_scales_pre.pkl"
        valid_path = f"../data/{config.DATA.DATASET_NAME}_valid_scales_pre.pkl"

    train_scale_mids_gt = load_pickle(train_path)['scale_mid'] if osp.exists(
        train_path) else None
    valid_scale_mids_gt = load_pickle(valid_path)['scale_mid'] if osp.exists(
        valid_path) else None
    train_seqnames, valid_seqnames = dataset_train.get_seqnames(
    ), dataset_test.get_seqnames()
    best_p1_mpjpe = best_p2_mpjpe = cur_p1_mpjpe = 10000.0
    best_auc_val = best_pckh5 = 0.0
    best_auc_val_p2 = best_pckh5_p2 = 0.0

    for epoch in range(config.TRAIN.NUM_EPOCHS):
        scheduler_d.step()
        scheduler_g.step()
        scheduler_temp.step()
        # scheduler_s.step()
        avg_d_loss, avg_g_loss, avg_t_loss, train_scale_mids_pre = train(
            train_loader,
            pose_model,
            discriminator,
            temp_discriminator,
            optimizer_g,
            optimizer_d,
            optimizer_d_temp,
            epoch,
            config,
            summary_writer=summary_writer,
            print_interval=config.PRINT_INTERVAL)
        logger.info(
            "***** Epoch: {}, Avg G Loss: {:.3f}, Avg D Loss: {:.3f} Avg T Loss: {:.3f} *****"
            .format(epoch, avg_g_loss, avg_d_loss, avg_t_loss))
        p1_mpjpe, p2_mpjpe, vis_image, valid_scale_mids_pre, extra_dict = validate(
            test_loader, pose_model, epoch, config)
        logger.info(
            "Epoch: {}, P1 Mpjpe/Best P1: {:.3f}/{:.3f}, P2 Mpjpe/Best P2/Cur P1: {:.3f}/{:.3f}/{:.3f}"
            .format(epoch, p1_mpjpe, best_p1_mpjpe, p2_mpjpe, best_p2_mpjpe,
                    cur_p1_mpjpe))
        if p2_mpjpe < best_p2_mpjpe:
            best_p2_mpjpe = p2_mpjpe
            cur_p1_mpjpe = p1_mpjpe
            is_best = True
        else:
            is_best = False

        if p1_mpjpe < best_p1_mpjpe:
            best_p1_mpjpe = p1_mpjpe

        if extra_dict is not None:
            auc_val, pckh5 = extra_dict['auc'], extra_dict['pckh5']
            auc_val_p2, pckh5_p2 = extra_dict['auc_p2'], extra_dict['pckh5_p2']
            if auc_val_p2 > best_auc_val_p2:
                best_auc_val_p2 = auc_val_p2
                best_pckh5_p2 = pckh5_p2
                is_best = True
            else:
                is_best = False

            if auc_val > best_auc_val:
                best_auc_val = auc_val
                best_pckh5 = pckh5
            logger.info(
                "[email protected](Best): {:.3f}({:.3f}), AUC value(Best): {:.3f}({:.3f})"
                .format(pckh5, best_pckh5, auc_val, best_auc_val))
            logger.info(
                "P2: [email protected](Best): {:.3f}({:.3f}), AUC value(Best): {:.3f}({:.3f})"
                .format(pckh5_p2, best_pckh5_p2, auc_val_p2, best_auc_val_p2))

        save_checkpoint(
            {
                "epoch": epoch,
                "auc": best_auc_val,
                "pckh5": best_pckh5,
                "auc_p2": best_auc_val_p2,
                "pckh5_p2": best_pckh5_p2,
                "p1_mpjpe": p1_mpjpe,
                "p2_mpjpe": p2_mpjpe,
                "pose_model_state_dict": pose_model.state_dict(),
                "discriminator_state_dict": discriminator.state_dict(),
                "temp_discriminator_state_dict":
                temp_discriminator.state_dict(),
                "optimizer_d": optimizer_d.state_dict(),
                "optimizer_g": optimizer_g.state_dict(),
                "optimizer_d_temp": optimizer_d_temp.state_dict()
            }, is_best, final_output_dir)
        summary_writer.add_scalar("p1_mpjpe_3d_test/epoch", p1_mpjpe, epoch)
        summary_writer.add_scalar("p2_mpjpe_3d_test/epoch", p2_mpjpe, epoch)
        summary_writer.add_image("test_joints/epoch", vis_image, epoch)
        if extra_dict is not None:
            summary_writer.add_scalar("PCKh0.5/epoch", pckh5, epoch)
            summary_writer.add_scalar("AUC/epoch", auc_val, epoch)

        if train_scale_mids_gt is not None and train_scale_mids_pre is not None and len(
                train_scale_mids_pre) > 0:
            num_seq = config.VIS.SCALE_MID_NUM_SEQ
            vis_image_scale_mid1 = plot_scalemid_dist(
                train_scale_mids_pre, train_scale_mids_gt.tolist())
            vis_image_scale_mid1 = torch.from_numpy(vis_image_scale_mid1).type(
                torch.float32).permute(2, 0, 1) / 255
            vis_image_scale_mid2 = plot_scalemid_seq_dist(
                train_scale_mids_pre,
                train_scale_mids_gt.tolist(),
                train_seqnames,
                num_seq=num_seq)
            vis_image_scale_mid2 = torch.from_numpy(vis_image_scale_mid2).type(
                torch.float32).permute(2, 0, 1) / 255
            summary_writer.add_image("train_scalemid_distribution/epoch",
                                     vis_image_scale_mid1, epoch)
            summary_writer.add_image("train_scalemid_seq_distribution/epoch",
                                     vis_image_scale_mid2, epoch)
        if valid_scale_mids_gt is not None and valid_scale_mids_pre is not None and len(
                valid_scale_mids_pre) > 0:
            num_seq = config.VIS.SCALE_MID_NUM_SEQ
            vis_image_scale_mid1 = plot_scalemid_dist(
                valid_scale_mids_pre, valid_scale_mids_gt.tolist())
            vis_image_scale_mid1 = torch.from_numpy(vis_image_scale_mid1).type(
                torch.float32).permute(2, 0, 1) / 255
            vis_image_scale_mid2 = plot_scalemid_seq_dist(
                valid_scale_mids_pre,
                valid_scale_mids_gt.tolist(),
                valid_seqnames,
                num_seq=num_seq)
            vis_image_scale_mid2 = torch.from_numpy(vis_image_scale_mid2).type(
                torch.float32).permute(2, 0, 1) / 255
            summary_writer.add_image("valid_scalemid_distribution/epoch",
                                     vis_image_scale_mid1, epoch)
            summary_writer.add_image("valid_scalemid_seq_distribution/epoch",
                                     vis_image_scale_mid2, epoch)

    summary_writer.close()
コード例 #11
0
def main(cfg):
    if cfg.SEED_VALUE >= 0:
        print(f'Seed value for the experiment {cfg.SEED_VALUE}')
        os.environ['PYTHONHASHSEED'] = str(cfg.SEED_VALUE)
        random.seed(cfg.SEED_VALUE)
        torch.manual_seed(cfg.SEED_VALUE)
        np.random.seed(cfg.SEED_VALUE)

    logger = create_logger(cfg.LOGDIR, phase='train')

    logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
    logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')

    logger.info(pprint.pformat(cfg))

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    writer = SummaryWriter(log_dir=cfg.LOGDIR)
    writer.add_text('config', pprint.pformat(cfg), 0)

    # ========= Dataloaders ========= #
    data_loaders = get_data_loaders(cfg)

    # ========= Compile Loss ========= #
    loss = TCMRLoss(
        e_loss_weight=cfg.LOSS.KP_2D_W,
        e_3d_loss_weight=cfg.LOSS.KP_3D_W,
        e_pose_loss_weight=cfg.LOSS.POSE_W,
        e_shape_loss_weight=cfg.LOSS.SHAPE_W,
        d_motion_loss_weight=cfg.LOSS.D_MOTION_LOSS_W,
    )

    # ========= Initialize networks, optimizers and lr_schedulers ========= #
    generator = TCMR(n_layers=cfg.MODEL.TGRU.NUM_LAYERS,
                     batch_size=cfg.TRAIN.BATCH_SIZE,
                     seqlen=cfg.DATASET.SEQLEN,
                     hidden_size=cfg.MODEL.TGRU.HIDDEN_SIZE,
                     pretrained=cfg.TRAIN.PRETRAINED_REGRESSOR).to(cfg.DEVICE)

    gen_optimizer = get_optimizer(
        model=generator,
        optim_type=cfg.TRAIN.GEN_OPTIM,
        lr=cfg.TRAIN.GEN_LR,
        weight_decay=cfg.TRAIN.GEN_WD,
        momentum=cfg.TRAIN.GEN_MOMENTUM,
    )

    motion_discriminator = MotionDiscriminator(
        rnn_size=cfg.TRAIN.MOT_DISCR.HIDDEN_SIZE,
        input_size=69,
        num_layers=cfg.TRAIN.MOT_DISCR.NUM_LAYERS,
        output_size=1,
        feature_pool=cfg.TRAIN.MOT_DISCR.FEATURE_POOL,
        attention_size=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL != 'attention'
        else cfg.TRAIN.MOT_DISCR.ATT.SIZE,
        attention_layers=None
        if cfg.TRAIN.MOT_DISCR.FEATURE_POOL != 'attention' else
        cfg.TRAIN.MOT_DISCR.ATT.LAYERS,
        attention_dropout=None
        if cfg.TRAIN.MOT_DISCR.FEATURE_POOL != 'attention' else
        cfg.TRAIN.MOT_DISCR.ATT.DROPOUT).to(cfg.DEVICE)

    dis_motion_optimizer = get_optimizer(model=motion_discriminator,
                                         optim_type=cfg.TRAIN.MOT_DISCR.OPTIM,
                                         lr=cfg.TRAIN.MOT_DISCR.LR,
                                         weight_decay=cfg.TRAIN.MOT_DISCR.WD,
                                         momentum=cfg.TRAIN.MOT_DISCR.MOMENTUM)

    motion_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        dis_motion_optimizer,
        mode='min',
        factor=0.1,
        patience=cfg.TRAIN.LR_PATIENCE,
        verbose=True,
    )

    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        gen_optimizer,
        mode='min',
        factor=0.1,
        patience=cfg.TRAIN.LR_PATIENCE,
        verbose=True,
    )

    # ========= Start Training ========= #
    Trainer(
        data_loaders=data_loaders,
        generator=generator,
        motion_discriminator=motion_discriminator,
        criterion=loss,
        dis_motion_optimizer=dis_motion_optimizer,
        dis_motion_update_steps=cfg.TRAIN.MOT_DISCR.UPDATE_STEPS,
        gen_optimizer=gen_optimizer,
        start_epoch=cfg.TRAIN.START_EPOCH,
        end_epoch=cfg.TRAIN.END_EPOCH,
        device=cfg.DEVICE,
        writer=writer,
        debug=cfg.DEBUG,
        logdir=cfg.LOGDIR,
        lr_scheduler=lr_scheduler,
        motion_lr_scheduler=motion_lr_scheduler,
        resume=cfg.TRAIN.RESUME,
        num_iters_per_epoch=cfg.TRAIN.NUM_ITERS_PER_EPOCH,
        debug_freq=cfg.DEBUG_FREQ,
    ).fit()
コード例 #12
0
def main():

    args = parse_args()

    logger, final_output_dir, tb_log_dir = \
        utils.create_logger(config, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn.benchmark = config.CUDNN.BENCHMARK
    # cudnn.determinstic = config.CUDNN.DETERMINISTIC
    # cudnn.enabled = config.CUDNN.ENABLED



    # if isinstance(config.TRAIN.LR_STEP, list):
    #     lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
    #         optimizer, config.TRAIN.LR_STEP,
    #         # config.TRAIN.LR_FACTOR, last_epoch-1
    #         config.TRAIN.LR_FACTOR, 0
    #     )
    # else:
    #     lr_scheduler = torch.optim.lr_scheduler.StepLR(
    #         optimizer, config.TRAIN.LR_STEP,
    #         # config.TRAIN.LR_FACTOR, last_epoch-1
    #     config.TRAIN.LR_FACTOR, 0
    #     )
    dataset_type = get_dataset(config)
    train_dataset = dataset_type(config,
                             is_train=True)

    # train_dataset[0]
    # return 0

    train_loader = DataLoader(
        dataset=dataset_type(config,
                             is_train=True),
        # batch_size=config.TRAIN.BATCH_SIZE_PER_GPU*len(gpus),
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS)



    # val_loader = DataLoader(
    #     dataset=dataset_type(config,
    #                          is_train=True),
    #     # batch_size=config.TEST.BATCH_SIZE_PER_GPU*len(gpus),
    #     batch_size=config.TEST.BATCH_SIZE_PER_GPU,
    #     shuffle=False,
    #     num_workers=config.WORKERS,
    #     # pin_memory=config.PIN_MEMORY
    # )

    model = models.get_face_alignment_net(config)

    # copy model files
    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    gpus = list(config.GPUS)
    # model = nn.DataParallel(model, device_ids=gpus).cuda()
    model.to("cuda")
    # loss
    criterion = torch.nn.MSELoss(size_average=True).cuda()
    # criterion = fnn.mse_loss
    # criterion = WingLoss()
    # criterion = Loss_weighted()

    optimizer = utils.get_optimizer(config, model)
    best_nme = 100
    last_epoch = config.TRAIN.BEGIN_EPOCH
    if config.TRAIN.RESUME:
        model_state_file = os.path.join(final_output_dir,
                                        'latest.pth')

        if os.path.isfile(model_state_file):
            with open(model_state_file, "rb") as fp:
                state_dict = torch.load(fp)
                model.load_state_dict(state_dict)
                last_epoch = 1
            # checkpoint = torch.load(model_state_file)
            # last_epoch = checkpoint['epoch']
            # best_nme = checkpoint['best_nme']
            # model.load_state_dict(checkpoint['state_dict'])
            # optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint (epoch {})"
                  .format(last_epoch))
        else:
            print("=> no checkpoint found")

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):
        lr_scheduler.step()

        function.train(config, train_loader, model, criterion,
                       optimizer, epoch, writer_dict)

        # evaluate
        nme = 0
        # nme, predictions = function.validate(config, val_loader, model,
        #                                    criterion, epoch, writer_dict)

        is_best = True
        # is_best = nme < best_nme
        best_nme = min(nme, best_nme)


        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        print("best:", is_best)
        torch.save(model.state_dict(), os.path.join(final_output_dir, 'mse_relu_checkpoint_{}.pth'.format(epoch)))

        # utils.save_checkpoint(
        #     {"state_dict": model,
        #      "epoch": epoch + 1,
        #      "best_nme": best_nme,
        #      "optimizer": optimizer.state_dict(),
        #      }, predictions, is_best, final_output_dir, 'checkpoint_{}.pth'.format(epoch))

    final_model_state_file = os.path.join(final_output_dir,
                                          'final_state.pth')
    logger.info('saving final model state to {}'.format(
        final_model_state_file))
    torch.save(model.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
コード例 #13
0
ファイル: train.py プロジェクト: jiangtaoo2333/FFL3-Streamax
def main():

    args = parse_args()

    # set logger and dir
    logger, final_output_dir, tb_log_dir = \
        utils.create_logger(config, args.experiment_name, 'train')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # set cudnn
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.determinstic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED

    # 目前仅支持单gpu,todo:增加多gpu支持
    # set model and loss and criterion
    model = models.get_face_alignment_net(config)
    model = model.cuda(config.GPUS[0])
    criterion = torch.nn.MSELoss(size_average=True).cuda(config.GPUS[0])
    # criterion = AdaptiveWingLoss()
    optimizer = utils.get_optimizer(config, model)

    # get dataset
    dataset_type = get_dataset(config)

    # get dataloader
    train_loader = DataLoader(dataset=dataset_type(config, is_train=True),
                              batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
                              shuffle=config.TRAIN.SHUFFLE,
                              num_workers=config.WORKERS,
                              pin_memory=config.PIN_MEMORY)

    val_loader = DataLoader(dataset=dataset_type(config, is_train=False),
                            batch_size=config.TEST.BATCH_SIZE_PER_GPU,
                            shuffle=False,
                            num_workers=config.WORKERS,
                            pin_memory=config.PIN_MEMORY)

    # set lr_scheduler
    last_epoch = config.TRAIN.BEGIN_EPOCH
    if isinstance(config.TRAIN.LR_STEP, list):
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
            last_epoch - 1)
    else:
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       config.TRAIN.LR_STEP,
                                                       config.TRAIN.LR_FACTOR,
                                                       last_epoch - 1)

    # set training writer
    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # set training resume function
    if config.TRAIN.RESUME:
        model_state_file = os.path.join(final_output_dir, 'latest.pth')
        if os.path.islink(model_state_file):
            checkpoint = torch.load(model_state_file)
            last_epoch = checkpoint['epoch']
            best_nme = checkpoint['best_nme']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found")

    # starting training
    best_nme = 10000
    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):

        lr_scheduler.step()

        # traing
        function.train(config, train_loader, model, criterion, optimizer,
                       epoch, writer_dict)

        # evaluating
        nme, predictions = function.validate(config, val_loader, model,
                                             criterion, epoch, writer_dict)

        # saving
        is_best = nme < best_nme
        best_nme = min(nme, best_nme)

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        print("best:", is_best)
        utils.save_checkpoint(
            {
                "state_dict": model,
                "epoch": epoch + 1,
                "best_nme": best_nme,
                "optimizer": optimizer.state_dict(),
            }, predictions, is_best, final_output_dir,
            'checkpoint_{}.pth'.format(epoch))

    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        'saving final model state to {}'.format(final_model_state_file))
    torch.save(model.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
コード例 #14
0
def main():
    best_perf = 0.0

    args = parse_args()
    reset_config(config, args)

    logger, final_output_dir = create_logger(config, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = models.pose3d_resnet.get_pose_net(config, is_train=True)

    # copy model file
    this_dir = os.path.dirname(__file__)

    shutil.copy2(args.cfg, final_output_dir)

    gpus = [int(i) for i in config.GPUS.split(',')]
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

    # define loss function (criterion) and optimizer
    loss_fn = eval('loss.' + config.LOSS.FN)
    criterion = loss_fn(num_joints=config.MODEL.NUM_JOINTS,
                        norm=config.LOSS.NORM).cuda()

    # define training, validation and evaluation routines
    train = train_integral
    validate = validate_integral
    evaluate = eval_integral

    optimizer = get_optimizer(config, model)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR)

    # Resume from a trained model
    if not (config.MODEL.RESUME is ''):
        checkpoint = torch.load(config.MODEL.RESUME)
        if 'epoch' in checkpoint.keys():
            config.TRAIN.BEGIN_EPOCH = checkpoint['epoch']
            best_perf = checkpoint['perf']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info('=> resume from pretrained model {}'.format(
                config.MODEL.RESUME))
        else:
            model.load_state_dict(checkpoint)
            logger.info('=> resume from pretrained model {}'.format(
                config.MODEL.RESUME))

    # Choose the dataset, either Human3.6M or mpii
    ds = eval('dataset.' + config.DATASET.DATASET)

    # Data loading code
    train_dataset = ds(cfg=config,
                       root=config.DATASET.ROOT,
                       image_set=config.DATASET.TRAIN_SET,
                       is_train=True)
    valid_dataset = ds(cfg=config,
                       root=config.DATASET.ROOT,
                       image_set=config.DATASET.TEST_SET,
                       is_train=False)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE * len(gpus),
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.TEST.BATCH_SIZE * len(gpus),
        shuffle=False,
        num_workers=config.WORKERS,
        pin_memory=True)

    best_model = False
    for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train for one epoch
        train(config, train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        preds_in_patch_with_score = validate(valid_loader, model)
        acc = evaluate(epoch,
                       preds_in_patch_with_score,
                       valid_loader,
                       final_output_dir,
                       debug=config.DEBUG.DEBUG)

        perf_indicator = 500. - acc if config.DATASET.DATASET == 'h36m' or 'mpii_3dhp' or 'jta' else acc

        if perf_indicator > best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': get_model_name(config),
                'state_dict': model.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(final_output_dir,
                                          'final_state.pth.tar')
    logger.info(
        'saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
コード例 #15
0
def main():

    # load config
    config = parse_arg()

    # create output folder
    output_dict = utils.create_log_folder(config, phase='train')

    # cudnn
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED

    # writer dict
    writer_dict = {
        'writer': SummaryWriter(log_dir=output_dict['tb_dir']),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # construct face related neural networks
    model = crnn.get_crnn(config)

    # get device
    if torch.cuda.is_available():
        device = torch.device("cuda:{}".format(config.GPUID))
    else:
        device = torch.device("cpu:0")

    model = model.to(device)

    # define loss function
    criterion = torch.nn.CTCLoss()

    optimizer = utils.get_optimizer(config, model)

    last_epoch = config.TRAIN.BEGIN_EPOCH
    if config.TRAIN.RESUME.IS_RESUME:
        model_state_file = config.TRAIN.RESUME.FILE
        if model_state_file == '':
            print(" => no checkpoint found")
        checkpoint = torch.load(model_state_file, map_location='cpu')
        model.load_state_dict(checkpoint['state_dict'])
        last_epoch = checkpoint['epoch']

    if isinstance(config.TRAIN.LR_STEP, list):
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, config.TRAIN.LR_STEP,
            config.TRAIN.LR_FACTOR, last_epoch-1
        )
    else:
        lr_scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer, config.TRAIN.LR_STEP,
            config.TRAIN.LR_FACTOR, last_epoch - 1
        )

    train_dataset = get_dataset(config)(config, is_train=True)
    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=config.PIN_MEMORY,
    )

    val_dataset = get_dataset(config)(config, is_train=False)
    val_loader = DataLoader(
        dataset=val_dataset,
        batch_size=config.TEST.BATCH_SIZE_PER_GPU,
        shuffle=config.TEST.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=config.PIN_MEMORY,
    )

    best_acc = 0.5
    converter = utils.strLabelConverter(config.DATASET.ALPHABETS)
    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):

        function.train(config, train_loader, train_dataset, converter, model, criterion, optimizer, device, epoch, writer_dict, output_dict)
        lr_scheduler.step()

        acc = function.validate(config, val_loader, val_dataset, converter, model, criterion, device, epoch, writer_dict, output_dict)

        is_best = acc > best_acc
        best_acc = max(acc, best_acc)

        print("is best:", is_best)
        print("best acc is:", best_acc)
        # save checkpoint
        torch.save(
            {
                "state_dict": model.state_dict(),
                "epoch": epoch + 1,
                "best_acc": best_acc,
            },  os.path.join(output_dict['chs_dir'], "checkpoint_{}_acc_{:.4f}.pth".format(epoch, acc))
        )

    writer_dict['writer'].close()
コード例 #16
0
ファイル: train.py プロジェクト: EXPmaster/YOLOP
def main():
    # set all the configurations
    args = parse_args()
    update_config(cfg, args)

    # set the logger, tb_log_dir means tensorboard logdir
    logger, final_output_dir, tb_log_dir = create_logger(
        cfg, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(cfg)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    # bulid up model
    model = get_net(cfg)
    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    # Data loading
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, True, transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))
    valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)(
        cfg, False, transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS),
        shuffle=False,
        num_workers=cfg.WORKERS,
        pin_memory=cfg.PIN_MEMORY)

    # define loss function (criterion) and optimizer
    criterion = get_loss(cfg).cuda()
    optimizer = get_optimizer(cfg, model)

    # load checkpoint model
    best_perf = 0.0
    best_model = False
    last_epoch = -1
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        cfg.TRAIN.LR_STEP,
                                                        cfg.TRAIN.LR_FACTOR,
                                                        last_epoch=last_epoch)
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    # training
    for epoch in range(begin_epoch + 1, cfg.TRAIN.END_EPOCH + 1):
        # train for one epoch
        train(cfg, train_loader, model, criterion, optimizer, epoch,
              writer_dict)

        lr_scheduler.step()

        # evaluate on validation set
        if epoch % cfg.TRAIN.VAL_FREQ == 0 or epoch == cfg.TRAIN.END_EPOCH + 1:
            perf_indicator = validate(cfg, valid_loader, valid_dataset, model,
                                      criterion, final_output_dir, tb_log_dir,
                                      writer_dict)

            if perf_indicator >= best_perf:
                best_perf = perf_indicator
                best_model = True
            else:
                best_model = False

            # save checkpoint model and best model
            logger.info('=> saving checkpoint to {}'.format(final_output_dir))
            save_checkpoint(
                {
                    'epoch': epoch,
                    'model': cfg.MODEL.NAME,
                    'state_dict': model.state_dict(),
                    'best_state_dict': model.module.state_dict(),
                    'perf': perf_indicator,
                    'optimizer': optimizer.state_dict(),
                }, best_model, final_output_dir)

    # save final model
    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        '=> saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
コード例 #17
0
def main(cfg):
    if cfg.SEED_VALUE >= 0:
        print(f'Seed value for the experiment {cfg.SEED_VALUE}')
        os.environ['PYTHONHASHSEED'] = str(cfg.SEED_VALUE)
        random.seed(cfg.SEED_VALUE)
        torch.manual_seed(cfg.SEED_VALUE)
        np.random.seed(cfg.SEED_VALUE)

    logger = create_logger(cfg.LOGDIR, phase='train')

    logger.info(f'GPU name -> {torch.cuda.get_device_name()}')
    logger.info(f'GPU feat -> {torch.cuda.get_device_properties("cuda")}')

    logger.info(pprint.pformat(cfg))

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    writer = SummaryWriter(log_dir=cfg.LOGDIR)
    writer.add_text('config', pprint.pformat(cfg), 0)

    # ========= Dataloaders ========= #
    data_loaders = get_data_loaders(cfg)

    # ========= Compile Loss ========= #
    loss = VIBELoss(
        e_loss_weight=cfg.LOSS.KP_2D_W,
        e_3d_loss_weight=cfg.LOSS.KP_3D_W,
        e_pose_loss_weight=cfg.LOSS.POSE_W,
        e_shape_loss_weight=cfg.LOSS.SHAPE_W,
        d_motion_loss_weight=cfg.LOSS.D_MOTION_LOSS_W,
    )

    # ========= Initialize networks, optimizers and lr_schedulers ========= #
    vibe = VIBE(
        n_layers=cfg.MODEL.TGRU.NUM_LAYERS,
        batch_size=cfg.TRAIN.BATCH_SIZE,
        seqlen=cfg.DATASET.SEQLEN,
        hidden_size=cfg.MODEL.TGRU.HIDDEN_SIZE,
        pretrained=cfg.TRAIN.PRETRAINED_REGRESSOR,
        add_linear=cfg.MODEL.TGRU.ADD_LINEAR,
        bidirectional=cfg.MODEL.TGRU.BIDIRECTIONAL,
        use_residual=cfg.MODEL.TGRU.RESIDUAL,
    ).to(cfg.DEVICE)

    if cfg.TRAIN.PRETRAINED != '' and os.path.isfile(cfg.TRAIN.PRETRAINED):
        checkpoint = torch.load(cfg.TRAIN.PRETRAINED)
        best_performance = checkpoint['performance']
        vibe.load_state_dict(checkpoint['gen_state_dict'])
        print(f'==> Loaded pretrained model from {cfg.TRAIN.PRETRAINED}...')
        print(f'Performance on 3DPW test set {best_performance}')
    else:
        print(f'{cfg.TRAIN.PRETRAINED} is not a pretrained model!!!!')

    generator = REFINER(vibe=vibe).to(cfg.DEVICE)

    gen_optimizer = get_optimizer(
        model=generator,
        optim_type=cfg.TRAIN.GEN_OPTIM,
        lr=cfg.TRAIN.GEN_LR,
        weight_decay=cfg.TRAIN.GEN_WD,
        momentum=cfg.TRAIN.GEN_MOMENTUM,
    )

    # motion_discriminator = MotionDiscriminator(
    #     rnn_size=cfg.TRAIN.MOT_DISCR.HIDDEN_SIZE,
    #     input_size=69,
    #     num_layers=cfg.TRAIN.MOT_DISCR.NUM_LAYERS,
    #     output_size=1,
    #     feature_pool=cfg.TRAIN.MOT_DISCR.FEATURE_POOL,
    #     attention_size=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.SIZE,
    #     attention_layers=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.LAYERS,
    #     attention_dropout=None if cfg.TRAIN.MOT_DISCR.FEATURE_POOL !='attention' else cfg.TRAIN.MOT_DISCR.ATT.DROPOUT
    # ).to(cfg.DEVICE)

    # dis_motion_optimizer = get_optimizer(
    #     model=motion_discriminator,
    #     optim_type=cfg.TRAIN.MOT_DISCR.OPTIM,
    #     lr=cfg.TRAIN.MOT_DISCR.LR,
    #     weight_decay=cfg.TRAIN.MOT_DISCR.WD,
    #     momentum=cfg.TRAIN.MOT_DISCR.MOMENTUM
    # )

    # motion_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #     dis_motion_optimizer,
    #     mode='min',
    #     factor=0.1,
    #     patience=cfg.TRAIN.LR_PATIENCE,
    #     verbose=True,
    # )

    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        gen_optimizer,
        mode='min',
        factor=0.1,
        patience=cfg.TRAIN.LR_PATIENCE,
        verbose=True,
    )

    # ========= Start Training ========= #
    motion_discriminator = None
    dis_motion_optimizer = None
    motion_lr_scheduler = None
    Trainer(
        data_loaders=data_loaders,
        generator=generator,
        motion_discriminator=motion_discriminator,
        criterion=loss,
        dis_motion_optimizer=dis_motion_optimizer,
        dis_motion_update_steps=cfg.TRAIN.MOT_DISCR.UPDATE_STEPS,
        gen_optimizer=gen_optimizer,
        start_epoch=cfg.TRAIN.START_EPOCH,
        end_epoch=cfg.TRAIN.END_EPOCH,
        device=cfg.DEVICE,
        writer=writer,
        debug=cfg.DEBUG,
        logdir=cfg.LOGDIR,
        lr_scheduler=lr_scheduler,
        motion_lr_scheduler=motion_lr_scheduler,
        resume=cfg.TRAIN.RESUME,
        num_iters_per_epoch=cfg.TRAIN.NUM_ITERS_PER_EPOCH,
        debug_freq=cfg.DEBUG_FREQ,
    ).fit()
コード例 #18
0
def main():

    args = parse_args()

    logger, final_output_dir, tb_log_dir = \
        utils.create_logger(config, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.determinstic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED
    gpus = list(config.GPUS)

    dataset_type = get_dataset(config)
    train_data = dataset_type(config, split="train")
    train_loader = DataLoader(dataset=train_data,
                              batch_size=config.TRAIN.BATCH_SIZE_PER_GPU *
                              len(gpus),
                              shuffle=config.TRAIN.SHUFFLE,
                              num_workers=config.WORKERS,
                              pin_memory=config.PIN_MEMORY)

    val_data = dataset_type(config, split="valid")
    val_loader = DataLoader(dataset=val_data,
                            batch_size=config.TEST.BATCH_SIZE_PER_GPU *
                            len(gpus),
                            shuffle=False,
                            num_workers=config.WORKERS,
                            pin_memory=config.PIN_MEMORY)

    # config.MODEL.NUM_JOINTS = train_data.get_num_points()
    model = models.get_face_alignment_net(config)

    # copy model files
    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    model = nn.DataParallel(model, device_ids=gpus).cuda()

    # loss
    criterion = torch.nn.MSELoss(size_average=True).cuda()
    optimizer = utils.get_optimizer(config, model)

    best_nme = 100
    last_epoch = config.TRAIN.BEGIN_EPOCH

    if isinstance(config.TRAIN.LR_STEP, list):
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
            last_epoch - 1)
    else:
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       config.TRAIN.LR_STEP,
                                                       config.TRAIN.LR_FACTOR,
                                                       last_epoch - 1)

    if config.TRAIN.RESUME:
        model_state_file = os.path.join(final_output_dir, 'final.pth')
        if os.path.islink(model_state_file):
            checkpoint = torch.load(model_state_file)
            last_epoch = checkpoint['epoch']
            best_nme = checkpoint['best_nme']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found")
    loss = []

    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):
        losses, diff = function.train(config, train_loader, model, criterion,
                                      optimizer, epoch, writer_dict)
        loss.append(losses)
        lr_scheduler.step()

        np.save(
            os.path.join(final_output_dir, "train_diff@epoch{}".format(epoch)),
            diff)

        # evaluate
        nme, predictions, diff = function.validate(config, val_loader, model,
                                                   criterion, epoch,
                                                   writer_dict)

        np.save(
            os.path.join(final_output_dir, "valid_diff@epoch{}".format(epoch)),
            diff)

        is_best = nme < best_nme
        best_nme = min(nme, best_nme)

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        print("best:", is_best)
        utils.save_checkpoint(
            {
                "state_dict": model,
                "epoch": epoch + 1,
                "best_nme": best_nme,
                "optimizer": optimizer.state_dict(),
            }, predictions, is_best, final_output_dir,
            'checkpoint_{}.pth'.format(epoch))
        if is_best:
            for i in range(len(predictions)):
                afile = val_data.annotation_files[i]
                new_afile = '{}.{}.txt'.format(
                    afile,
                    os.path.basename(args.cfg).split('.')[0])
                with open(new_afile, 'wt') as f:
                    pts = predictions[i].cpu().numpy()
                    for j in range(len(pts)):
                        f.write("{},{}\n".format(
                            pts[j][1] / val_data.factor[1],
                            pts[j][0] / val_data.factor[0]))

    pd.DataFrame(data=loss).to_csv('loss2.csv')
    final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
    logger.info(
        'saving final model state to {}'.format(final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()