예제 #1
0
def train_net(args):
    cropsize = [cfgs.crop_height, cfgs.crop_width]
    # dataset_train = CityScapes(cfgs.data_dir, cropsize=cropsize, mode='train')
    dataset_train = ContextVoc(cfgs.train_file,cropsize=cropsize, mode='train')
    dataloader_train = DataLoader(
        dataset_train,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        drop_last=True
    )
    # dataset_val = CityScapes(cfgs.data_dir,  mode='val')
    dataset_val = ContextVoc(cfgs.val_file, mode='val')
    dataloader_val = DataLoader(
        dataset_val,
        batch_size=1,
        shuffle=True,
        num_workers=args.num_workers,
        drop_last=True
    )
    # build net
    os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda
    if torch.cuda.is_available() and args.use_gpu:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    net = BiSeNet(cfgs.num_classes, cfgs.netname).to(device)
    # net = BiSeNet(cfgs.num_classes).to(device)
    if args.mulgpu:
        net = torch.nn.DataParallel(net)
    if args.pretrained_model_path is not None:
        print('load model from %s ...' % args.pretrained_model_path)
        load_dict = torch.load(args.pretrained_model_path,map_location=device)
        dict_new = renamedict(net.module.state_dict(),load_dict)
        net.module.load_state_dict(dict_new,strict=False)
        # net.load_state_dict(torch.load(args.pretrained_model_path))
        print('Done!')
    net.train()
    # build optimizer
    if args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(net.parameters(), args.learning_rate)
    elif args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(net.parameters(), args.learning_rate, momentum=0.9, weight_decay=1e-4)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(net.parameters(), args.learning_rate)
    else:  
        print('not supported optimizer \n')
        optimizer = None
    #build loss
    if args.losstype == 'dice':
        criterion = DiceLoss()
    elif args.losstype == 'crossentropy':
        criterion = torch.nn.CrossEntropyLoss()
    elif args.losstype == 'ohem':
        score_thres = 0.7
        n_min = args.batch_size * cfgs.crop_height * cfgs.crop_width //16
        criterion = OhemCELoss(thresh=score_thres, n_min=n_min)
    elif args.losstype == 'focal':
        criterion = SoftmaxFocalLoss()
    return net,optimizer,criterion,dataloader_train,dataloader_val
예제 #2
0
def train(verbose=True, **kwargs):
    args = kwargs['args']
    torch.cuda.set_device(args.local_rank)
    dist.init_process_group(
                backend = 'nccl',
                init_method = 'tcp://127.0.0.1:{}'.format(cfg.port),
                world_size = torch.cuda.device_count(),
                rank = args.local_rank
                )
    setup_logger(cfg.respth)
    logger = logging.getLogger()

    ## dataset
    ds = CityScapes(cfg, mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size = cfg.ims_per_gpu,
                    shuffle = False,
                    sampler = sampler,
                    num_workers = cfg.n_workers,
                    pin_memory = True,
                    drop_last = True)

    ## model
    net = Deeplab_v3plus(cfg)
    net.train()
    net.cuda()
    net = nn.parallel.DistributedDataParallel(net,
            device_ids = [args.local_rank, ],
            output_device = args.local_rank
            )
    n_min = cfg.ims_per_gpu*cfg.crop_size[0]*cfg.crop_size[1]//16
    criteria = OhemCELoss(thresh=cfg.ohem_thresh, n_min=n_min).cuda()

    ## optimizer
    optim = Optimizer(
            net,
            cfg.lr_start,
            cfg.momentum,
            cfg.weight_decay,
            cfg.warmup_steps,
            cfg.warmup_start_lr,
            cfg.max_iter,
            cfg.lr_power
            )

    ## train loop
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    n_epoch = 0
    for it in range(cfg.max_iter):
        try:
            im, lb = next(diter)
            if not im.size()[0]==cfg.ims_per_gpu: continue
        except StopIteration:
            n_epoch += 1
            sampler.set_epoch(n_epoch)
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()

        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        logits = net(im)
        loss = criteria(logits, lb)
        loss.backward()
        optim.step()

        loss_avg.append(loss.item())
        ## print training log message
        if it%cfg.msg_iter==0 and not it==0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((cfg.max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds = eta))
            msg = ', '.join([
                    'iter: {it}/{max_it}',
                    'lr: {lr:4f}',
                    'loss: {loss:.4f}',
                    'eta: {eta}',
                    'time: {time:.4f}',
                ]).format(
                    it = it,
                    max_it = cfg.max_iter,
                    lr = lr,
                    loss = loss_avg,
                    time = t_intv,
                    eta = eta
                )
            logger.info(msg)
            loss_avg = []
            st = ed

    ## dump the final model and evaluate the result
    if verbose:
        net.cpu()
        save_pth = osp.join(cfg.respth, 'model_final.pth')
        state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
        if dist.get_rank()==0: torch.save(state, save_pth)
        logger.info('training done, model saved to: {}'.format(save_pth))
        logger.info('evaluating the final model')
        net.cuda()
        net.eval()
        evaluator = MscEval(cfg)
        mIOU = evaluator(net)
        logger.info('mIOU is: {}'.format(mIOU))
예제 #3
0
def train():
    args = parse_args()
    torch.cuda.set_device(args.local_rank)
    dist.init_process_group(backend='nccl',
                            init_method='tcp://127.0.0.1:33241',
                            world_size=torch.cuda.device_count(),
                            rank=args.local_rank)
    setup_logger(respth)

    # dataset
    n_classes = 19
    n_img_per_gpu = 16
    n_workers = 8
    cropsize = [448, 448]
    data_root = '/home/data2/DATASET/CelebAMask-HQ/'

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    shuffle=False,
                    sampler=sampler,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    # model
    ignore_idx = -100
    net = BiSeNet(n_classes=n_classes)
    net.cuda()
    net.train()
    net = nn.parallel.DistributedDataParallel(net,
                                              device_ids=[
                                                  args.local_rank,
                                              ],
                                              output_device=args.local_rank)
    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(model=net.module,
                      lr0=lr_start,
                      momentum=momentum,
                      wd=weight_decay,
                      warmup_steps=warmup_steps,
                      warmup_start_lr=warmup_start_lr,
                      max_iter=max_iter,
                      power=power)

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    epoch = 0
    for it in range(max_iter):
        try:
            im, lb = next(diter)
            if not im.size()[0] == n_img_per_gpu:
                raise StopIteration
        except StopIteration:
            epoch += 1
            sampler.set_epoch(epoch)
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = LossP(out, lb)
        loss2 = Loss2(out16, lb)
        loss3 = Loss3(out32, lb)
        loss = lossp + loss2 + loss3
        loss.backward()
        optim.step()

        loss_avg.append(loss.item())

        #  print training log message
        if (it + 1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                'it: {it}/{max_it}',
                'lr: {lr:4f}',
                'loss: {loss:.4f}',
                'eta: {eta}',
                'time: {time:.4f}',
            ]).format(it=it + 1,
                      max_it=max_iter,
                      lr=lr,
                      loss=loss_avg,
                      time=t_intv,
                      eta=eta)
            logger.info(msg)
            loss_avg = []
            st = ed
        if dist.get_rank() == 0:
            if (it + 1) % 5000 == 0:
                state = net.module.state_dict() if hasattr(
                    net, 'module') else net.state_dict()
                if dist.get_rank() == 0:
                    torch.save(state, './res/cp/{}_iter.pth'.format(it))
                evaluate(dspth='/home/data2/DATASET/CelebAMask-HQ/test-img',
                         cp='{}_iter.pth'.format(it))

    #  dump the final model
    save_pth = osp.join(respth, 'model_final_diss.pth')
    # net.cpu()
    state = net.module.state_dict() if hasattr(net,
                                               'module') else net.state_dict()
    if dist.get_rank() == 0:
        torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))
def train():
    setup_logger(respth)
    # dataset
    gpu_number = torch.cuda.device_count()
    n_classes = 19
    n_img_all_gpu = 16 * gpu_number
    cropsize = [448, 448]
    data_root = '/home/data2/DATASET/CelebAMask-HQ/'

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    dl = DataLoader(
        ds,
        batch_size=n_img_all_gpu,
        shuffle=True,
    )

    # model
    ignore_idx = -100
    net = BiSeNet(n_classes=n_classes)
    net.cuda()
    net.train()
    net = nn.DataParallel(net)
    net = net.cuda()
    score_thres = 0.7
    n_min = n_img_all_gpu * cropsize[0] * cropsize[1] // 16
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    # optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(model=net.module,
                      lr0=lr_start,
                      momentum=momentum,
                      wd=weight_decay,
                      warmup_steps=warmup_steps,
                      warmup_start_lr=warmup_start_lr,
                      max_iter=max_iter,
                      power=power)

    # train loop
    msg_iter = 2
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    for it in range(max_iter):
        # try:
        im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        # H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = LossP(out, lb)
        loss2 = Loss2(out16, lb)
        loss3 = Loss3(out32, lb)
        loss = lossp + loss2 + loss3
        loss.backward()
        optim.step()
        loss_avg.append(loss.item())

        #  print training log message
        if (it + 1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                'it: {it}/{max_it}',
                'lr: {lr:4f}',
                'loss: {loss:.4f}',
                'eta: {eta}',
                'time: {time:.4f}',
            ]).format(it=it + 1,
                      max_it=max_iter,
                      lr=lr,
                      loss=loss_avg,
                      time=t_intv,
                      eta=eta)
            logger.info(msg)
            loss_avg = []
            st = ed

    #  dump the final model
    save_pth = osp.join(respth, 'model_final_diss.pth')
    # net.cpu()
    state = net.module.state_dict() if hasattr(net,
                                               'module') else net.state_dict()
    if dist.get_rank() == 0:
        torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))
예제 #5
0
파일: train.py 프로젝트: XLEric/FaceParsing
def train(fintune_model, data_root, respth):

    # dataset
    n_classes = 19
    n_img_per_gpu = 16
    n_workers = 8
    cropsize = [448, 448]

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    # sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    shuffle=True,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    # model
    ignore_idx = -100

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    net = BiSeNet(n_classes=n_classes)
    net = net.to(device)

    if os.access(fintune_model, os.F_OK) and (fintune_model
                                              is not None):  # checkpoint
        chkpt = torch.load(fintune_model, map_location=device)
        net.load_state_dict(chkpt)
        print('load fintune model : {}'.format(fintune_model))
    else:
        print('no fintune model')

    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_epoch = 1000

    optim = Optimizer.SGD(net.parameters(),
                          lr=lr_start,
                          momentum=momentum,
                          weight_decay=weight_decay)

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    # diter = iter(dl)
    epoch = 0
    flag_change_lr_cnt = 0  # 学习率更新计数器
    init_lr = lr_start  # 学习率

    best_loss = np.inf
    loss_mean = 0.  # 损失均值
    loss_idx = 0.  # 损失计算计数器

    print('start training ~')
    it = 0
    for epoch in range(max_epoch):
        net.train()
        # 学习率更新策略
        if loss_mean != 0.:
            if best_loss > (loss_mean / loss_idx):
                flag_change_lr_cnt = 0
                best_loss = (loss_mean / loss_idx)
            else:
                flag_change_lr_cnt += 1

                if flag_change_lr_cnt > 30:
                    init_lr = init_lr * 0.1
                    set_learning_rate(optimizer, init_lr)
                    flag_change_lr_cnt = 0

        loss_mean = 0.  # 损失均值
        loss_idx = 0.  # 损失计算计数器

        for i, (im, lb) in enumerate(dl):

            im = im.cuda()
            lb = lb.cuda()
            H, W = im.size()[2:]
            lb = torch.squeeze(lb, 1)

            optim.zero_grad()
            out, out16, out32 = net(im)
            lossp = LossP(out, lb)
            loss2 = Loss2(out16, lb)
            loss3 = Loss3(out32, lb)
            loss = lossp + loss2 + loss3

            loss_mean += loss.item()
            loss_idx += 1.

            loss.backward()
            optim.step()

            if it % msg_iter == 0:

                print('epoch <{}/{}> -->> <{}/{}> -> iter {} : loss {:.5f}, loss_mean :{:.5f}, best_loss :{:.5f},lr :{:.6f},batch_size : {}'.\
                format(epoch,max_epoch,i,int(ds.__len__()/n_img_per_gpu),it,loss.item(),loss_mean/loss_idx,best_loss,init_lr,n_img_per_gpu))
                # print(msg)

                if (it) % 500 == 0:
                    state = net.module.state_dict() if hasattr(
                        net, 'module') else net.state_dict()
                    torch.save(state, respth + '/model/face_parse_latest.pth')
                    # evaluate(dspth='./images', cp='{}_iter.pth'.format(it))
            it += 1
        torch.save(state,
                   respth + '/model/face_parse_epoch_{}.pth'.format(epoch))
예제 #6
0
def train(args):
    torch.cuda.set_device(args.local_rank)
    dist.init_process_group(backend='nccl',
                            init_method='tcp://127.0.0.1:34850',
                            world_size=torch.cuda.device_count(),
                            rank=args.local_rank
                            # rank=0
                            )

    dataset = CityScapes(mode='train')
    # dataset = CityScapes_trainval(mode='train')
    # dataset = ADE20K(mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
    dataloader = DataLoader(dataset,
                            batch_size=config.imgs_per_gpu,
                            shuffle=False,
                            sampler=sampler,
                            num_workers=4,
                            pin_memory=True,
                            drop_last=True)

    print(dataloader.__len__())
    # net = Origin_Res()
    net = PANet(config.classes)
    # net = HighOrder(config.classes)
    # for i in net.named_modules():
    #     print(i)
    # net = Deeplab_v3plus()

    net.train()
    net.cuda()
    net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net)
    net = nn.parallel.DistributedDataParallel(net,
                                              device_ids=[args.local_rank],
                                              output_device=args.local_rank)

    n_min = config.imgs_per_gpu * config.crop_size[0] * config.crop_size[
        1] // 16
    criteria = OhemCELoss(thresh=config.ohem_thresh,
                          n_min=n_min,
                          ignore_lb=config.ignore_label).cuda()

    optimizer = Optimizer(net, config.lr_start, config.momentum,
                          config.weight_decay, config.warmup_steps,
                          config.warmup_start_lr, config.max_iter,
                          config.lr_power)

    total_loss = 0
    n_epoch = 0

    data = iter(dataloader)
    for i in range(config.max_iter):
        start = time.time()
        try:
            image, label, name = next(data)
        except:
            n_epoch += 1
            sampler.set_epoch(n_epoch)
            data = iter(dataloader)
            image, label, name = next(data)

        image = image.cuda()
        label = label.cuda()
        label = torch.squeeze(label, 1)

        # output = net(image)
        output, guidence = net(image)

        # loss = criteria(output, label)
        loss = 0.9 * criteria(output, label) + 0.1 * criteria(guidence, label)
        loss = loss.mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        if (i + 1) % 100 == 0 and dist.get_rank() == 0:
            end = time.time()
            once_time = end - start
            remaining_step = config.max_iter - i
            remaining_time = once_time * remaining_step
            m, s = divmod(remaining_time, 60)
            h, m = divmod(m, 60)
            print('iter: {}, loss: {}, time: {}h:{}m'.format(
                i + 1, total_loss / 100.0, int(h), int(m)))
            total_loss = 0

        if (i + 1) % 100 == 0 and (i + 1) >= (int(config.max_iter) -
                                              200) and dist.get_rank() == 0:
            torch.save(net.state_dict(),
                       './ablation/15x15global{}.pth'.format(i + 1))
예제 #7
0
def train():
    os.environ["CUDA_VISIBLE_DEVICES"] = '6, 7'
    args = parse_args()
    # 1. 初始化horovod
    hvd.init()
    # 2. 给当前进程分配对应的gpu,local_rank()返回的是当前是第几个进程
    torch.cuda.set_device(hvd.local_rank())

    # torch.cuda.set_device(args.local_rank)
    # dist.init_process_group(
    #             backend = 'nccl',
    #             init_method = 'tcp://127.0.0.1:33271',
    #             world_size = 2,
    #             world_size = torch.cuda.device_count(),
    # rank=args.local_rank
    # )
    setup_logger(respth)

    ## dataset
    n_classes = 19
    n_img_per_gpu = 8
    n_workers = 4
    cropsize = [1024, 1024]
    ds = CityScapes('/dataset/cityscapes/leftImg8bit_trainvaltest',
                    cropsize=cropsize,
                    mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(
        ds, num_replicas=hvd.size(), rank=hvd.rank())
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    shuffle=False,
                    sampler=sampler,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    ## model
    ignore_idx = 255
    net = BiSeNet(n_classes=n_classes)
    net.cuda()
    # 5. 初始化的时候广播参数,这个是为了在一开始的时候同步各个gpu之间的参数
    hvd.broadcast_parameters(net.state_dict(), root_rank=0)
    net.train()
    # net = nn.parallel.DistributedDataParallel(net,
    #         device_ids = [args.local_rank, ],
    #         output_device = args.local_rank
    #         )
    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    criteria_p = OhemCELoss(thresh=score_thres,
                            n_min=n_min,
                            ignore_lb=ignore_idx)
    criteria_16 = OhemCELoss(thresh=score_thres,
                             n_min=n_min,
                             ignore_lb=ignore_idx)
    criteria_32 = OhemCELoss(thresh=score_thres,
                             n_min=n_min,
                             ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(model=net,
                      lr0=lr_start,
                      momentum=momentum,
                      wd=weight_decay,
                      warmup_steps=warmup_steps,
                      warmup_start_lr=warmup_start_lr,
                      max_iter=max_iter,
                      power=power)
    hvd.broadcast_optimizer_state(optim.optim, root_rank=0)
    optim = hvd.DistributedOptimizer(optim.optim,
                                     named_parameters=net.named_parameters())

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    epoch = 0
    for it in range(max_iter):
        try:
            im, lb = next(diter)
            if not im.size()[0] == n_img_per_gpu: raise StopIteration
        except StopIteration:
            epoch += 1
            sampler.set_epoch(epoch)
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = criteria_p(out, lb)
        loss2 = criteria_16(out16, lb)
        loss3 = criteria_32(out32, lb)
        loss = lossp + loss2 + loss3
        loss.backward()
        optim.step()

        loss_avg.append(loss.item())
        ## print training log message
        if (it + 1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                'it: {it}/{max_it}',
                'lr: {lr:4f}',
                'loss: {loss:.4f}',
                'eta: {eta}',
                'time: {time:.4f}',
            ]).format(it=it + 1,
                      max_it=max_iter,
                      lr=lr,
                      loss=loss_avg,
                      time=t_intv,
                      eta=eta)
            logger.info(msg)
            loss_avg = []
            st = ed

    ## dump the final model
    save_pth = osp.join(respth, 'model_final.pth')
    net.cpu()
    state = net.module.state_dict() if hasattr(net,
                                               'module') else net.state_dict()
    if dist.get_rank() == 0: torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))
def train(verbose=True, **kwargs):
    args = kwargs['args']
    torch.cuda.set_device(args.local_rank)
    dist.init_process_group(backend='nccl',
                            init_method='tcp://127.0.0.1:{}'.format(cfg.port),
                            world_size=torch.cuda.device_count(),
                            rank=args.local_rank)
    setup_logger(cfg.respth)
    logger = logging.getLogger()

    ## dataset
    ds = CityScapes(cfg, mode='train', num_copys=2)
    sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size=cfg.ims_per_gpu,
                    shuffle=False,
                    sampler=sampler,
                    num_workers=cfg.n_workers,
                    collate_fn=collate_fn2,
                    pin_memory=True,
                    drop_last=True)

    ## model
    net = Deeplab_v3plus(cfg)
    net.train()
    net.cuda()
    net = nn.parallel.DistributedDataParallel(net,
                                              device_ids=[
                                                  args.local_rank,
                                              ],
                                              output_device=args.local_rank)
    n_min = cfg.ims_per_gpu * cfg.crop_size[0] * cfg.crop_size[1] // 16
    criteria = OhemCELoss(thresh=cfg.ohem_thresh, n_min=n_min).cuda()
    Criterion = pgc_loss(use_pgc=[0, 1, 2], criteria=criteria)
    ## optimizer
    optim = Optimizer(net, cfg.lr_start, cfg.momentum, cfg.weight_decay,
                      cfg.warmup_steps, cfg.warmup_start_lr, cfg.max_iter,
                      cfg.lr_power)
    alpha, beta = cfg.alpha, cfg.beta
    ## train loop
    loss_avg = []
    pgc_avg = []
    ce_avg = []
    ssp_avg = []
    ohem_avg = []

    st = glob_st = time.time()
    diter = iter(dl)
    n_epoch = 0
    for it in range(cfg.max_iter):
        try:
            im, lb, overlap, flip = next(diter)
            if not im.size()[0] != cfg.ims_per_gpu // 2:
                continue
        except StopIteration:
            n_epoch += 1
            sampler.set_epoch(n_epoch)
            diter = iter(dl)
            im, lb, overlap, flip = next(diter)
        im = im.cuda()
        lb = lb.cuda()

        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)
        optim.zero_grad()
        im1, im2 = im[::2], im[1::2]
        lb1, lb2 = lb[::2], lb[1::2]
        logits1 = net(im1)
        logits2 = net(im2)
        # logits = torch.cat([logits1[-1], logits2[-1]], dim=0)

        outputs = []
        for f1, f2 in zip(logits1, logits2):
            outputs.append([f1, f2])
        logits = torch.cat([logits1[-1], logits2[-1]], dim=0)

        mse, sym_ce, mid_mse, mid_ce, mid_l1, ce = Criterion(
            outputs, overlap, flip, lb)
        # loss = criteria(logits, lb)
        loss = beta * sym_ce + ce
        gc_loss = sum(mid_mse)
        loss += alpha * gc_loss
        loss.backward()

        optim.step()

        loss_avg.append(loss.item())
        ohem_avg.append(ce.item())
        pgc_avg.append(gc_loss.item())
        ssp_avg.append(sym_ce.item())
        ## print training log message
        if it % cfg.msg_iter == 0 and not it == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            ohem = sum(ohem_avg) / len(ohem_avg)
            pgc = sum(pgc_avg) / len(pgc_avg)
            ssp = sum(ssp_avg) / len(ssp_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((cfg.max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                'iter: {it}/{max_it}',
                'lr: {lr:4f}',
                'loss: {loss:.4f}',
                'ohem: {ohem:.4f}',
                'pgc: {pgc:.4f}',
                'ssp: {ssp:.4f}',
                'eta: {eta}',
                'time: {time:.4f}',
            ]).format(
                it=it,
                max_it=cfg.max_iter,
                lr=lr,
                loss=loss_avg,
                time=t_intv,
                eta=eta,
                ohem=ohem,
                pgc=pgc,
                ssp=ssp,
            )
            logger.info(msg)
            loss_avg = []
            pgc_avg = []
            ssp_avg = []
            ohem_avg = []
            st = ed

    ## dump the final model and evaluate the result
    if verbose:
        net.cpu()
        save_pth = osp.join(cfg.respth, 'model_final.pth')
        state = net.module.state_dict() if hasattr(
            net, 'module') else net.state_dict()
        if dist.get_rank() == 0: torch.save(state, save_pth)
        logger.info('training done, model saved to: {}'.format(save_pth))
        logger.info('evaluating the final model')
        net.cuda()
        net.eval()
        evaluator = MscEval(cfg)
        mIOU = evaluator(net)
        logger.info('mIOU is: {}'.format(mIOU))
예제 #9
0
def train():
    args = parse_args()
    dist.init_process_group(
                backend='nccl',
                world_size=torch.cuda.device_count()
                )
    local_rank = torch.distributed.get_rank()
    torch.cuda.set_device(local_rank)
    device = torch.device("cuda", local_rank)                
    setup_logger(respth)

    # dataset
    n_classes = 19
    n_img_per_gpu = 8
    n_workers = 4
    cropsize = [1024, 1024]
    ds = CityScapes('../data/cityscapes', cropsize=cropsize, mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    sampler=sampler,
                    shuffle=False,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    logger.info('successful load data')

    ignore_idx = 255
    net = AttaNet(n_classes=n_classes)
    if not args.ckpt is None:
        net.load_state_dict(torch.load(args.ckpt, map_location='cpu'))
        logger.info('successful load weights')
    net.cuda(device)
    net.train()
    net = torch.nn.parallel.DistributedDataParallel(net, find_unused_parameters=True,
                                                    device_ids=[local_rank],
                                                    output_device=local_rank)
    logger.info('successful distributed')
    score_thres = 0.7
    n_min = cropsize[0]*cropsize[1]//2
    criteria_p = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    criteria_aux1 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    criteria_aux2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    # optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 200000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(
            model=net.module,
            lr0=lr_start,
            momentum=momentum,
            wd=weight_decay,
            warmup_steps=warmup_steps,
            warmup_start_lr=warmup_start_lr,
            max_iter=max_iter,
            power=power)

    # train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    epoch = 0
    for it in range(max_iter):
        try:
            im, lb = next(diter)
            if not im.size()[0] == n_img_per_gpu: raise StopIteration
        except StopIteration:
            epoch += 1
            sampler.set_epoch(epoch)
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = criteria_p(out, lb)
        loss1 = criteria_aux1(out16, lb)
        loss2 = criteria_aux2(out32, lb)
        loss = lossp + loss1 + loss2
        loss.backward()
        optim.step()

        loss_avg.append(loss.item())
        # print training log message
        if (it+1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                    'it: {it}/{max_it}',
                    'lr: {lr:4f}',
                    'loss: {loss:.4f}',
                    'eta: {eta}',
                    'time: {time:.4f}',
                ]).format(
                    it=it+1,
                    max_it=max_iter,
                    lr=lr,
                    loss=loss_avg,
                    time=t_intv,
                    eta=eta
                )
            logger.info(msg)
            loss_avg = []
            st = ed

    save_pth = osp.join(args.snapshot_dir, 'model_final.pth')
    net.cpu()
    state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
    if dist.get_rank() == 0:
        torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))
예제 #10
0
def train():
    args = parse_args()
    cudnn.benchmark = True
    # dataset
    #n_classes = 19
    n_classes = 11
    n_img_per_gpu = 32
    n_workers = 8
    #cropsize = [448, 448]
    cropsize = [224, 224]
    data_root = 'dataset/parsing/'

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    num_workers=8,
                    shuffle=True,
                    drop_last=True)
    # model
    ignore_idx = -100
    #net = BiSeNet(n_classes=n_classes)
    net = PSP(11, 'resnet50')
    #net.load_state_dict(torch.load('model_best.pth.tar')["state_dict"])

    net = net.cuda()

    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 4
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min,
                       ignore_lb=ignore_idx).cuda()
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min,
                       ignore_lb=ignore_idx).cuda()

    ## optimizer
    #momentum = 0.9
    weight_decay = 1e-4
    lr_start = 2e-3
    args.lr = lr_start
    max_iter = 120000
    train_optimizer = torch.optim.Adam(net.parameters(),
                                       lr=lr_start,
                                       weight_decay=weight_decay)

    ## train loop
    msg_iter = 10
    loss_avg = []
    diter = iter(dl)
    epoch = 0

    net.train()
    for it in range(max_iter):
        adjust_learning_rate(train_optimizer, epoch, it, max_iter, args)
        try:
            im, lb = next(diter)
            if not im.size()[0] == n_img_per_gpu:
                raise StopIteration
        except StopIteration:
            epoch += 1
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        train_optimizer.zero_grad()
        out, out16 = net(im)
        lossp = LossP(out, lb)
        loss2 = Loss2(out16, lb)
        loss = lossp + loss2
        loss.backward()
        train_optimizer.step()
        loss_avg.append(loss.item())

        #  print training log message
        if (it + 1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            #lr = train_optimizer.lr
            msg = ', '.join([
                'it: {it}/{max_it}',
                'loss: {loss:.4f}',
            ]).format(
                it=it + 1,
                max_it=max_iter,
                loss=loss_avg,
            )
            print(msg)
            loss_avg = []
        if (it + 1) % 10000 == 0:
            state = net.module.state_dict() if hasattr(
                net, 'module') else net.state_dict()
            torch.save(state, './res/cp/{}_iter.pth'.format(it))

    #  dump the final model
    save_pth = osp.join(respth, 'model_final_diss.pth')
    # net.cpu()
    state = net.module.state_dict() if hasattr(net,
                                               'module') else net.state_dict()
    torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))
예제 #11
0
def evaluate(respth='./res/test_res',
             dspth='./data',
             cp='model_final_diss.pth'):

    if not os.path.exists(respth):
        os.makedirs(respth)

    n_classes = 19
    net = BiSeNet(n_classes=n_classes)
    net.cuda()
    save_pth = osp.join(respth, 'cp', cp)
    net.load_state_dict(torch.load(save_pth))
    net.eval()

    no_iter = str(int(cp.split('_')[0]))
    org_respth = respth[:]
    respth = os.path.join(respth, no_iter)

    if not os.path.exists(respth):
        os.makedirs(respth)

    to_tensor = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])
    ''' added '''
    cropsize = [448, 448]
    n_img_per_gpu = 16

    data_root = '/home/jihyun/workspace/face_parsing/dataset/CelebAMask-HQ/'
    ds = FaceMask(data_root, cropsize=cropsize, mode='val')
    dl = DataLoader(ds, batch_size=16, shuffle=False, drop_last=True)

    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    score_thres = 0.7
    ignore_idx = -100

    loss_avg = []
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    with torch.no_grad():

        for i, sample in enumerate(dl):
            im, lb = sample
            im = im.cuda()
            lb = lb.cuda()
            lb = torch.squeeze(lb, 1)
            out, out16, out32 = net(im)
            lossp = LossP(out, lb)
            loss2 = Loss2(out16, lb)
            loss3 = Loss3(out32, lb)
            loss = lossp + loss2 + loss3

            loss_avg.append(loss.item())

        loss_avg = sum(loss_avg) / len(loss_avg)

        f = open(osp.join(org_respth, 'loss.log'), 'a')
        f.write(' eval_loss: ' + str(loss_avg) + '\n')
        f.close()

        for image_path in os.listdir(dspth):
            img = Image.open(osp.join(dspth, image_path))
            image = img.resize((512, 512), Image.BILINEAR)
            img = to_tensor(image)
            img = torch.unsqueeze(img, 0)
            img = img.cuda()
            out, out16, out32 = net(img)

            parsing = out.squeeze(0).cpu().numpy().argmax(0)

            vis_parsing_maps(image,
                             parsing,
                             stride=1,
                             save_im=True,
                             save_path=osp.join(respth, image_path))
예제 #12
0
def train():
    args = parse_args()
    torch.cuda.set_device(args.local_rank)
    dist.init_process_group(
                backend = 'nccl',
                init_method = 'tcp://127.0.0.1:33241',
                world_size = torch.cuda.device_count(),
                rank=args.local_rank
                )
    setup_logger(respth)

    ## dataset
    n_classes = 19#19
    n_img_per_gpu = 5
    n_workers = 10#4
    cropsize = [1024, 1024]
    ds = CityScapes('./data/cityscapes', cropsize=cropsize, mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size = n_img_per_gpu,
                    shuffle = False,
                    sampler = sampler,
                    num_workers = n_workers,
                    pin_memory = True,
                    drop_last = True)

    ## model
    ignore_idx = 255
    
    device = torch.device("cuda")
    net = ShelfNet(n_classes=n_classes)
    net.load_state_dict(torch.load('./res/model_final_idd.pth'))
    net.to(device)
    
    # net.load_state_dict(checkpoint['model'].module.state_dict())
    # net.cuda()
    net.train()
    # net.cuda()
    # net.train()
    # net = nn.parallel.DistributedDataParallel(net,
    #         device_ids = [args.local_rank, ],
    #         output_device = args.local_rank,
    #         find_unused_parameters=True
    #         )
    score_thres = 0.7
    n_min = n_img_per_gpu*cropsize[0]*cropsize[1]//16
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(
            model = net,
            lr0 = lr_start,
            momentum = momentum,
            wd = weight_decay,
            warmup_steps = warmup_steps,
            warmup_start_lr = warmup_start_lr,
            max_iter = max_iter,
            power = power)

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    epoch = 0
    for it in range(max_iter):
        try:
            im, lb = next(diter)
            if not im.size()[0]==n_img_per_gpu: raise StopIteration
        except StopIteration:
            epoch += 1
            sampler.set_epoch(epoch)
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = LossP(out, lb)
        loss2 = Loss2(out16, lb)
        loss3 = Loss3(out32, lb)
        loss = lossp + loss2 + loss3
        loss.backward()
        optim.step()

        loss_avg.append(loss.item())
        ## print training log message
        if (it+1)%msg_iter==0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                    'it: {it}/{max_it}',
                    'lr: {lr:4f}',
                    'loss: {loss:.4f}',
                    'eta: {eta}',
                    'time: {time:.4f}',
                ]).format(
                    it = it+1,
                    max_it = max_iter,
                    lr = lr,
                    loss = loss_avg,
                    time = t_intv,
                    eta = eta
                )
            logger.info(msg)
            loss_avg = []
            st = ed

        if it % (1000) == 0:#1000
            ## dump the final model
            save_pth = osp.join(respth, 'shelfnet_model_it_%d.pth'%it)
            #net.cpu()
            #state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
            #if dist.get_rank() == 0: torch.save(state, save_pth)
            torch.save(net.state_dict(),save_pth)

            if it % (1000) == 0 and it > 0:#1000
                evaluate(checkpoint=save_pth)

    ## dump the final model
    save_pth = osp.join(respth, 'model_final.pth')
    net.cpu()
    state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
    if dist.get_rank()==0: torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))
예제 #13
0
def train(opt):
    # saving setting
    opt.saved_path = opt.saved_path + opt.project
    opt.log_path = os.path.join(opt.saved_path, 'tensorboard')
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    # gpu setting
    os.environ["CUDA_VISIBLE_DEVICES"] = '2, 3, 4, 5, 6'
    gpu_number = torch.cuda.device_count()

    # dataset setting
    n_classes = 17
    n_img_all_gpu = opt.batch_size * gpu_number
    cropsize = [448, 448]
    data_root = '/home/data2/DATASET/vschallenge'
    num_workers = opt.num_workers

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    dl = DataLoader(ds,
                    batch_size=n_img_all_gpu,
                    shuffle=True,
                    num_workers=num_workers,
                    drop_last=True)
    ds_eval = FaceMask(data_root, cropsize=cropsize, mode='val')
    dl_eval = DataLoader(ds_eval,
                         batch_size=n_img_all_gpu,
                         shuffle=True,
                         num_workers=num_workers,
                         drop_last=True)

    ignore_idx = -100
    net = BiSeNet(n_classes=n_classes)
    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0
        try:
            ret = net.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights '
                'with different number of classes. The rest of the weights should be loaded already.'
            )

            print(
                f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
            )
    else:
        last_step = 0
        print('[Info] initializing weights...')

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    net = net.cuda()
    net = nn.DataParallel(net)

    score_thres = 0.7
    n_min = n_img_all_gpu * cropsize[0] * cropsize[1] // opt.batch_size
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    # optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = opt.lr
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(model=net.module,
                      lr0=lr_start,
                      momentum=momentum,
                      wd=weight_decay,
                      warmup_steps=warmup_steps,
                      warmup_start_lr=warmup_start_lr,
                      max_iter=max_iter,
                      power=power)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim.optim,
                                                           patience=3,
                                                           verbose=True)
    # train loop
    loss_avg = []
    step = max(0, last_step)
    max_iter = len(dl)
    best_epoch = 0
    epoch = 0
    best_loss = 1e5
    net.train()
    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // max_iter
            if epoch < last_epoch:
                continue
            epoch_loss = []
            progress_bar = tqdm(dl)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * max_iter:
                    progress_bar.update()
                    continue
                try:
                    im = data['img']
                    lb = data['label']
                    lb = torch.squeeze(lb, 1)
                    im = im.cuda()
                    lb = lb.cuda()

                    optim.zero_grad()
                    out, out16, out32 = net(im)
                    lossp = LossP(out, lb)
                    loss2 = Loss2(out16, lb)
                    loss3 = Loss3(out32, lb)
                    loss = lossp + loss2 + loss3
                    if loss == 0 or not torch.isfinite(loss):
                        continue
                    loss.backward()
                    optim.step()
                    loss_avg.append(loss.item())
                    #  print training log message
                    # progress_bar.set_description(
                    #     'Epoch: {}/{}. Iteration: {}/{}. p_loss: {:.5f}. 2_loss: {:.5f}. 3_loss: {:.5f}. loss_avg: {:.5f}'.format(
                    #         epoch, opt.num_epochs, iter + 1, max_iter, lossp.item(),
                    #         loss2.item(), loss3.item(), loss.item()))
                    print(
                        'p_loss: {:.5f}. 2_loss: {:.5f}. 3_loss: {:.5f}. loss_avg: {:.5f}'
                        .format(lossp.item(), loss2.item(), loss3.item(),
                                loss.item()))

                    writer.add_scalars('Lossp', {'train': lossp}, step)
                    writer.add_scalars('loss2', {'train': loss2}, step)
                    writer.add_scalars('loss3', {'train': loss3}, step)
                    writer.add_scalars('loss_avg', {'train': loss}, step)

                    # log learning_rate
                    lr = optim.lr
                    writer.add_scalar('learning_rate', lr, step)
                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(net, f'Bisenet_{epoch}_{step}.pth')
                        print('checkpoint...')

                except Exception as e:
                    print('[Erro]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                net.eval()
                loss_p = []
                loss_2 = []
                loss_3 = []
                for iter, data in enumerate(dl_eval):
                    with torch.no_grad():
                        im = data['img']
                        lb = data['label']
                        lb = torch.squeeze(lb, 1)
                        im = im.cuda()
                        lb = lb.cuda()

                        out, out16, out32 = net(im)
                        lossp = LossP(out, lb)
                        loss2 = Loss2(out16, lb)
                        loss3 = Loss3(out32, lb)
                        loss = lossp + loss2 + loss3
                        if loss == 0 or not torch.isfinite(loss):
                            continue
                        loss_p.append(lossp.item())
                        loss_2.append(loss2.item())
                        loss_3.append(loss3.item())
                lossp = np.mean(loss_p)
                loss2 = np.mean(loss_2)
                loss3 = np.mean(loss_3)
                loss = lossp + loss2 + loss3
                print(
                    'Val. Epoch: {}/{}. p_loss: {:1.5f}. 2_loss: {:1.5f}. 3_loss: {:1.5f}. Total_loss: {:1.5f}'
                    .format(epoch, opt.num_epochs, lossp, loss2, loss3, loss))
                writer.add_scalars('Total_loss', {'val': loss}, step)
                writer.add_scalars('p_loss', {'val': lossp}, step)
                writer.add_scalars('2_loss', {'val': loss2}, step)
                writer.add_scalars('3_loss', {'val': loss3}, step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(net, f'Bisenet_{epoch}_{step}.pth')

                net.train()  # ??
                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(net, f'Bisenet_{epoch}_{step}.pth')
        writer.close()
    writer.close()
예제 #14
0
def train():
    args = parse_args()
    torch.cuda.set_device(args.local_rank)
    dist.init_process_group(backend='nccl',
                            init_method='tcp://127.0.0.1:33271',
                            world_size=torch.cuda.device_count(),
                            rank=args.local_rank)
    setup_logger(respth)

    ## dataset
    n_classes = 19
    n_img_per_gpu = 8
    n_workers = 4
    cropsize = [1024, 1024]
    ds = CityScapes('./data', cropsize=cropsize, mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    shuffle=False,
                    sampler=sampler,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    ## model
    ignore_idx = 255
    net = BiSeNet(n_classes=n_classes)
    net = nn.SyncBatchNorm.convert_sync_batchnorm(net)
    net.cuda()
    net.train()
    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    criteria_p = OhemCELoss(thresh=score_thres,
                            n_min=n_min,
                            ignore_lb=ignore_idx)
    criteria_16 = OhemCELoss(thresh=score_thres,
                             n_min=n_min,
                             ignore_lb=ignore_idx)
    criteria_32 = OhemCELoss(thresh=score_thres,
                             n_min=n_min,
                             ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(
        #  model = net.module,
        model=net,
        lr0=lr_start,
        momentum=momentum,
        wd=weight_decay,
        warmup_steps=warmup_steps,
        warmup_start_lr=warmup_start_lr,
        max_iter=max_iter,
        power=power)

    ## fp16
    net, opt = amp.initialize(net, optim.optim, opt_level='O1')
    optim.optim = opt

    ## set dist
    net = nn.parallel.DistributedDataParallel(net,
                                              device_ids=[
                                                  args.local_rank,
                                              ],
                                              output_device=args.local_rank)

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    epoch = 0
    for it in range(max_iter):
        try:
            im, lb = next(diter)
            if not im.size()[0] == n_img_per_gpu: raise StopIteration
        except StopIteration:
            epoch += 1
            sampler.set_epoch(epoch)
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = criteria_p(out, lb)
        loss2 = criteria_16(out16, lb)
        loss3 = criteria_32(out32, lb)
        loss = lossp + loss2 + loss3
        #  loss.backward()
        with amp.scale_loss(loss, opt) as scaled_loss:
            scaled_loss.backward()
        optim.step()

        loss_avg.append(loss.item())
        ## print training log message
        if (it + 1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                'it: {it}/{max_it}',
                'lr: {lr:4f}',
                'loss: {loss:.4f}',
                'eta: {eta}',
                'time: {time:.4f}',
            ]).format(it=it + 1,
                      max_it=max_iter,
                      lr=lr,
                      loss=loss_avg,
                      time=t_intv,
                      eta=eta)
            logger.info(msg)
            loss_avg = []
            st = ed

    ## dump the final model
    save_pth = osp.join(respth, 'model_final.pth')
    net.cpu()
    state = net.module.state_dict() if hasattr(net,
                                               'module') else net.state_dict()
    if dist.get_rank() == 0: torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))