def main():

    # argument parse and create log
    args = parse_args()
    cfg.set_args(args.gpu_ids, args.mode, args.continue_train)
    cudnn.benchmark = True

    trainer = Trainer()
    trainer._make_batch_generator()
    trainer._make_model()

    # train
    for epoch in range(trainer.start_epoch, cfg.end_epoch):

        trainer.set_lr(epoch)
        trainer.tot_timer.tic()
        trainer.read_timer.tic()
        for itr, (inputs, targets,
                  meta_info) in enumerate(trainer.batch_generator):

            trainer.read_timer.toc()
            trainer.gpu_timer.tic()

            # forward
            trainer.optimizer.zero_grad()
            loss = trainer.model(inputs, targets, meta_info, 'train')
            loss = {k: loss[k].mean() for k in loss}

            # backward
            sum(loss[k] for k in loss).backward()
            trainer.optimizer.step()
            trainer.gpu_timer.toc()
            screen = [
                'Epoch %d/%d itr %d/%d:' %
                (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
                'lr: %g' % (trainer.get_lr()),
                'speed: %.2f(%.2fs r%.2f)s/itr' %
                (trainer.tot_timer.average_time,
                 trainer.gpu_timer.average_time,
                 trainer.read_timer.average_time),
                '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. *
                                 trainer.itr_per_epoch),
            ]
            screen += [
                '%s: %.4f' % ('loss_' + k, v.detach())
                for k, v in loss.items()
            ]
            trainer.logger.info(' '.join(screen))

            trainer.tot_timer.toc()
            trainer.tot_timer.tic()
            trainer.read_timer.tic()

        trainer.save_model(
            {
                'epoch': epoch,
                'network': trainer.model.state_dict(),
                'optimizer': trainer.optimizer.state_dict(),
            }, epoch)
Beispiel #2
0
def main():
    
    # argument parse and create log
    args = parse_args()
    cfg.set_args(args.gpu_ids, args.continue_train)
    cudnn.fastest = True
    cudnn.benchmark = True

    trainer = Trainer()
    trainer._make_batch_generator()
    trainer._make_model()

    # train
    for epoch in range(trainer.start_epoch, cfg.end_epoch):
        
        trainer.set_lr(epoch)
        trainer.tot_timer.tic()
        trainer.read_timer.tic()

        for itr, (input_img, k_value, root_img, root_vis, joints_have_depth) in enumerate(trainer.batch_generator):
            trainer.read_timer.toc()
            trainer.gpu_timer.tic()

            # forward
            trainer.optimizer.zero_grad()
            target = {'coord': root_img, 'vis': root_vis, 'have_depth': joints_have_depth}
            loss_coord = trainer.model(input_img, k_value, target)
            loss_coord = loss_coord.mean();

            # backward
            loss = loss_coord

            loss.backward()
            trainer.optimizer.step()
            
            trainer.gpu_timer.toc()

            screen = [
                'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
                'lr: %g' % (trainer.get_lr()),
                'speed: %.2f(%.2fs r%.2f)s/itr' % (
                    trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time),
                '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch),
                '%s: %.4f' % ('loss_coord', loss_coord.detach()),
                ]
            trainer.logger.info(' '.join(screen))

            trainer.tot_timer.toc()
            trainer.tot_timer.tic()
            trainer.read_timer.tic()

        trainer.save_model({
            'epoch': epoch,
            'network': trainer.model.state_dict(),
            'optimizer': trainer.optimizer.state_dict(),
        }, epoch)
Beispiel #3
0
def main():
    # argument parse and create log
    args = parse_args()

    # restrict one gpu : not support distributed learning
    cfg.set_args(args.gpu)
    cudnn.benchmark = True

    # set trainer
    trainer = Trainer()
    trainer.build_dataloader()
    trainer.build_model()
    trainer.set_optimizer()
    trainer.set_scheduler()
    start_epoch = 0

    # load model
    if cfg.load_checkpoint:
        start_epoch = trainer.load_model()

    # logger
    logger = Logger()

    # train model
    for epoch in range(start_epoch, cfg.epoch):
        for i, data in enumerate(trainer.dataloader):
            trainer.optimizer.zero_grad()
            proposal_loss, detection_loss = trainer.model(data)

            loss = proposal_loss[
                0] + proposal_loss[1] * cfg.pro_loc_lambda + detection_loss[
                    0] + detection_loss[1] * cfg.det_loc_lambda
            loss.backward()
            trainer.optimizer.step()

            logger.log(proposal_loss, detection_loss, epoch, i,
                       epoch * len(trainer.dataloader) + i)

            if cfg.visualize_switch:
                if i % 500 == 0:
                    cfg.visualize = True
                else:
                    cfg.visualize = False

        trainer.scheduler.step()

        if cfg.save_checkpoint:
            trainer.save_model(epoch)
def main():
    
    # argument parse and create log
    args = parse_args()
    cfg.set_args(args.gpu_ids, args.continue_train)
    cudnn.fastest = True
    cudnn.benchmark = True

    trainer = Trainer()
    trainer._make_batch_generator()
    trainer._make_model()

    # train
    for epoch in range(trainer.start_epoch, cfg.end_epoch):
        
        trainer.set_lr(epoch)
        trainer.tot_timer.tic()
        trainer.read_timer.tic()

        for itr in range(trainer.itr_per_epoch):
            
            input_img_list, joint_img_list, joint_vis_list, joints_have_depth_list = [], [], [], []
            for i in range(len(cfg.trainset)):
                try:
                    input_img, joint_img, joint_vis, joints_have_depth = next(trainer.iterator[i])
                except StopIteration:
                    trainer.iterator[i] = iter(trainer.batch_generator[i])
                    input_img, joint_img, joint_vis, joints_have_depth = next(trainer.iterator[i])

                input_img_list.append(input_img)
                joint_img_list.append(joint_img)
                joint_vis_list.append(joint_vis)
                joints_have_depth_list.append(joints_have_depth)
            
            # aggregate items from different datasets into one single batch
            input_img = torch.cat(input_img_list,dim=0)
            joint_img = torch.cat(joint_img_list,dim=0)
            joint_vis = torch.cat(joint_vis_list,dim=0)
            joints_have_depth = torch.cat(joints_have_depth_list,dim=0)
            
            # shuffle items from different datasets
            rand_idx = []
            for i in range(len(cfg.trainset)):
                rand_idx.append(torch.arange(i,input_img.shape[0],len(cfg.trainset)))
            rand_idx = torch.cat(rand_idx,dim=0)
            rand_idx = rand_idx[torch.randperm(input_img.shape[0])]
            input_img = input_img[rand_idx]; joint_img = joint_img[rand_idx]; joint_vis = joint_vis[rand_idx]; joints_have_depth = joints_have_depth[rand_idx];
            target = {'coord': joint_img, 'vis': joint_vis, 'have_depth': joints_have_depth}

            trainer.read_timer.toc()
            trainer.gpu_timer.tic()

            trainer.optimizer.zero_grad()
            
            # forward
            loss_coord = trainer.model(input_img, target)
            loss_coord = loss_coord.mean()

            # backward
            loss = loss_coord
            loss.backward()
            trainer.optimizer.step()
            
            trainer.gpu_timer.toc()
            screen = [
                'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
                'lr: %g' % (trainer.get_lr()),
                'speed: %.2f(%.2fs r%.2f)s/itr' % (
                    trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time),
                '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch),
                '%s: %.4f' % ('loss_coord', loss_coord.detach()),
                ]
            trainer.logger.info(' '.join(screen))
            trainer.tot_timer.toc()
            trainer.tot_timer.tic()
            trainer.read_timer.tic()

        trainer.save_model({
            'epoch': epoch,
            'network': trainer.model.state_dict(),
            'optimizer': trainer.optimizer.state_dict(),
        }, epoch)
Beispiel #5
0
def main():

    # argument parse and create log
    args = parse_args()
    cfg.set_args(args.gpu_ids, args.continue_train)
    cudnn.fastest = True
    cudnn.benchmark = True
    cudnn.deterministic = False
    cudnn.enabled = True

    trainer = Trainer(cfg)
    trainer._make_batch_generator()
    trainer._make_model()

    # train
    for epoch in range(trainer.start_epoch, cfg.end_epoch):
        trainer.scheduler.step()
        trainer.tot_timer.tic()
        trainer.read_timer.tic()

        for itr, (input_img, joint_img, joint_vis,
                  joints_have_depth) in enumerate(trainer.batch_generator):
            trainer.read_timer.toc()
            trainer.gpu_timer.tic()

            trainer.optimizer.zero_grad()

            input_img = input_img.cuda()
            joint_img = joint_img.cuda()
            joint_vis = joint_vis.cuda()
            joints_have_depth = joints_have_depth.cuda()

            # forward
            heatmap_out = trainer.model(input_img)

            # backward
            JointLocationLoss = trainer.JointLocationLoss(
                heatmap_out, joint_img, joint_vis, joints_have_depth)

            loss = JointLocationLoss

            loss.backward()
            trainer.optimizer.step()

            trainer.gpu_timer.toc()

            screen = [
                'Epoch %d/%d itr %d/%d:' %
                (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
                'lr: %g' % (trainer.scheduler.get_lr()[0]),
                'speed: %.2f(%.2fs r%.2f)s/itr' %
                (trainer.tot_timer.average_time,
                 trainer.gpu_timer.average_time,
                 trainer.read_timer.average_time),
                '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. *
                                 trainer.itr_per_epoch),
                '%s: %.4f' % ('loss_loc', JointLocationLoss.detach()),
            ]
            trainer.logger.info(' '.join(screen))

            trainer.tot_timer.toc()
            trainer.tot_timer.tic()
            trainer.read_timer.tic()

        trainer.save_model(
            {
                'epoch': epoch,
                'network': trainer.model.state_dict(),
                'optimizer': trainer.optimizer.state_dict(),
                'scheduler': trainer.scheduler.state_dict(),
            }, epoch)