Ejemplo n.º 1
0
def train():
    args = parse_args()
    torch.cuda.set_device(args.local_rank)
    dist.init_process_group(backend='nccl',
                            init_method='tcp://127.0.0.1:33241',
                            world_size=torch.cuda.device_count(),
                            rank=args.local_rank)
    setup_logger(respth)

    # dataset
    n_classes = 19
    n_img_per_gpu = 16
    n_workers = 8
    cropsize = [448, 448]
    data_root = '/home/data2/DATASET/CelebAMask-HQ/'

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    shuffle=False,
                    sampler=sampler,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    # model
    ignore_idx = -100
    net = BiSeNet(n_classes=n_classes)
    net.cuda()
    net.train()
    net = nn.parallel.DistributedDataParallel(net,
                                              device_ids=[
                                                  args.local_rank,
                                              ],
                                              output_device=args.local_rank)
    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(model=net.module,
                      lr0=lr_start,
                      momentum=momentum,
                      wd=weight_decay,
                      warmup_steps=warmup_steps,
                      warmup_start_lr=warmup_start_lr,
                      max_iter=max_iter,
                      power=power)

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    epoch = 0
    for it in range(max_iter):
        try:
            im, lb = next(diter)
            if not im.size()[0] == n_img_per_gpu:
                raise StopIteration
        except StopIteration:
            epoch += 1
            sampler.set_epoch(epoch)
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = LossP(out, lb)
        loss2 = Loss2(out16, lb)
        loss3 = Loss3(out32, lb)
        loss = lossp + loss2 + loss3
        loss.backward()
        optim.step()

        loss_avg.append(loss.item())

        #  print training log message
        if (it + 1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                'it: {it}/{max_it}',
                'lr: {lr:4f}',
                'loss: {loss:.4f}',
                'eta: {eta}',
                'time: {time:.4f}',
            ]).format(it=it + 1,
                      max_it=max_iter,
                      lr=lr,
                      loss=loss_avg,
                      time=t_intv,
                      eta=eta)
            logger.info(msg)
            loss_avg = []
            st = ed
        if dist.get_rank() == 0:
            if (it + 1) % 5000 == 0:
                state = net.module.state_dict() if hasattr(
                    net, 'module') else net.state_dict()
                if dist.get_rank() == 0:
                    torch.save(state, './res/cp/{}_iter.pth'.format(it))
                evaluate(dspth='/home/data2/DATASET/CelebAMask-HQ/test-img',
                         cp='{}_iter.pth'.format(it))

    #  dump the final model
    save_pth = osp.join(respth, 'model_final_diss.pth')
    # net.cpu()
    state = net.module.state_dict() if hasattr(net,
                                               'module') else net.state_dict()
    if dist.get_rank() == 0:
        torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))
Ejemplo n.º 2
0
def train(fintune_model, data_root, respth):

    # dataset
    n_classes = 19
    n_img_per_gpu = 16
    n_workers = 8
    cropsize = [448, 448]

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    # sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    shuffle=True,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    # model
    ignore_idx = -100

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    net = BiSeNet(n_classes=n_classes)
    net = net.to(device)

    if os.access(fintune_model, os.F_OK) and (fintune_model
                                              is not None):  # checkpoint
        chkpt = torch.load(fintune_model, map_location=device)
        net.load_state_dict(chkpt)
        print('load fintune model : {}'.format(fintune_model))
    else:
        print('no fintune model')

    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_epoch = 1000

    optim = Optimizer.SGD(net.parameters(),
                          lr=lr_start,
                          momentum=momentum,
                          weight_decay=weight_decay)

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    # diter = iter(dl)
    epoch = 0
    flag_change_lr_cnt = 0  # 学习率更新计数器
    init_lr = lr_start  # 学习率

    best_loss = np.inf
    loss_mean = 0.  # 损失均值
    loss_idx = 0.  # 损失计算计数器

    print('start training ~')
    it = 0
    for epoch in range(max_epoch):
        net.train()
        # 学习率更新策略
        if loss_mean != 0.:
            if best_loss > (loss_mean / loss_idx):
                flag_change_lr_cnt = 0
                best_loss = (loss_mean / loss_idx)
            else:
                flag_change_lr_cnt += 1

                if flag_change_lr_cnt > 30:
                    init_lr = init_lr * 0.1
                    set_learning_rate(optimizer, init_lr)
                    flag_change_lr_cnt = 0

        loss_mean = 0.  # 损失均值
        loss_idx = 0.  # 损失计算计数器

        for i, (im, lb) in enumerate(dl):

            im = im.cuda()
            lb = lb.cuda()
            H, W = im.size()[2:]
            lb = torch.squeeze(lb, 1)

            optim.zero_grad()
            out, out16, out32 = net(im)
            lossp = LossP(out, lb)
            loss2 = Loss2(out16, lb)
            loss3 = Loss3(out32, lb)
            loss = lossp + loss2 + loss3

            loss_mean += loss.item()
            loss_idx += 1.

            loss.backward()
            optim.step()

            if it % msg_iter == 0:

                print('epoch <{}/{}> -->> <{}/{}> -> iter {} : loss {:.5f}, loss_mean :{:.5f}, best_loss :{:.5f},lr :{:.6f},batch_size : {}'.\
                format(epoch,max_epoch,i,int(ds.__len__()/n_img_per_gpu),it,loss.item(),loss_mean/loss_idx,best_loss,init_lr,n_img_per_gpu))
                # print(msg)

                if (it) % 500 == 0:
                    state = net.module.state_dict() if hasattr(
                        net, 'module') else net.state_dict()
                    torch.save(state, respth + '/model/face_parse_latest.pth')
                    # evaluate(dspth='./images', cp='{}_iter.pth'.format(it))
            it += 1
        torch.save(state,
                   respth + '/model/face_parse_epoch_{}.pth'.format(epoch))
def train():
    setup_logger(respth)
    # dataset
    gpu_number = torch.cuda.device_count()
    n_classes = 19
    n_img_all_gpu = 16 * gpu_number
    cropsize = [448, 448]
    data_root = '/home/data2/DATASET/CelebAMask-HQ/'

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    dl = DataLoader(
        ds,
        batch_size=n_img_all_gpu,
        shuffle=True,
    )

    # model
    ignore_idx = -100
    net = BiSeNet(n_classes=n_classes)
    net.cuda()
    net.train()
    net = nn.DataParallel(net)
    net = net.cuda()
    score_thres = 0.7
    n_min = n_img_all_gpu * cropsize[0] * cropsize[1] // 16
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    # optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(model=net.module,
                      lr0=lr_start,
                      momentum=momentum,
                      wd=weight_decay,
                      warmup_steps=warmup_steps,
                      warmup_start_lr=warmup_start_lr,
                      max_iter=max_iter,
                      power=power)

    # train loop
    msg_iter = 2
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    for it in range(max_iter):
        # try:
        im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        # H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = LossP(out, lb)
        loss2 = Loss2(out16, lb)
        loss3 = Loss3(out32, lb)
        loss = lossp + loss2 + loss3
        loss.backward()
        optim.step()
        loss_avg.append(loss.item())

        #  print training log message
        if (it + 1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                'it: {it}/{max_it}',
                'lr: {lr:4f}',
                'loss: {loss:.4f}',
                'eta: {eta}',
                'time: {time:.4f}',
            ]).format(it=it + 1,
                      max_it=max_iter,
                      lr=lr,
                      loss=loss_avg,
                      time=t_intv,
                      eta=eta)
            logger.info(msg)
            loss_avg = []
            st = ed

    #  dump the final model
    save_pth = osp.join(respth, 'model_final_diss.pth')
    # net.cpu()
    state = net.module.state_dict() if hasattr(net,
                                               'module') else net.state_dict()
    if dist.get_rank() == 0:
        torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))
Ejemplo n.º 4
0
def train():
    os.environ["CUDA_VISIBLE_DEVICES"] = '6, 7'
    args = parse_args()
    # 1. 初始化horovod
    hvd.init()
    # 2. 给当前进程分配对应的gpu,local_rank()返回的是当前是第几个进程
    torch.cuda.set_device(hvd.local_rank())

    # torch.cuda.set_device(args.local_rank)
    # dist.init_process_group(
    #             backend = 'nccl',
    #             init_method = 'tcp://127.0.0.1:33271',
    #             world_size = 2,
    #             world_size = torch.cuda.device_count(),
    # rank=args.local_rank
    # )
    setup_logger(respth)

    ## dataset
    n_classes = 19
    n_img_per_gpu = 8
    n_workers = 4
    cropsize = [1024, 1024]
    ds = CityScapes('/dataset/cityscapes/leftImg8bit_trainvaltest',
                    cropsize=cropsize,
                    mode='train')
    sampler = torch.utils.data.distributed.DistributedSampler(
        ds, num_replicas=hvd.size(), rank=hvd.rank())
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    shuffle=False,
                    sampler=sampler,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    ## model
    ignore_idx = 255
    net = BiSeNet(n_classes=n_classes)
    net.cuda()
    # 5. 初始化的时候广播参数,这个是为了在一开始的时候同步各个gpu之间的参数
    hvd.broadcast_parameters(net.state_dict(), root_rank=0)
    net.train()
    # net = nn.parallel.DistributedDataParallel(net,
    #         device_ids = [args.local_rank, ],
    #         output_device = args.local_rank
    #         )
    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    criteria_p = OhemCELoss(thresh=score_thres,
                            n_min=n_min,
                            ignore_lb=ignore_idx)
    criteria_16 = OhemCELoss(thresh=score_thres,
                             n_min=n_min,
                             ignore_lb=ignore_idx)
    criteria_32 = OhemCELoss(thresh=score_thres,
                             n_min=n_min,
                             ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_iter = 80000
    power = 0.9
    warmup_steps = 1000
    warmup_start_lr = 1e-5
    optim = Optimizer(model=net,
                      lr0=lr_start,
                      momentum=momentum,
                      wd=weight_decay,
                      warmup_steps=warmup_steps,
                      warmup_start_lr=warmup_start_lr,
                      max_iter=max_iter,
                      power=power)
    hvd.broadcast_optimizer_state(optim.optim, root_rank=0)
    optim = hvd.DistributedOptimizer(optim.optim,
                                     named_parameters=net.named_parameters())

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    diter = iter(dl)
    epoch = 0
    for it in range(max_iter):
        try:
            im, lb = next(diter)
            if not im.size()[0] == n_img_per_gpu: raise StopIteration
        except StopIteration:
            epoch += 1
            sampler.set_epoch(epoch)
            diter = iter(dl)
            im, lb = next(diter)
        im = im.cuda()
        lb = lb.cuda()
        H, W = im.size()[2:]
        lb = torch.squeeze(lb, 1)

        optim.zero_grad()
        out, out16, out32 = net(im)
        lossp = criteria_p(out, lb)
        loss2 = criteria_16(out16, lb)
        loss3 = criteria_32(out32, lb)
        loss = lossp + loss2 + loss3
        loss.backward()
        optim.step()

        loss_avg.append(loss.item())
        ## print training log message
        if (it + 1) % msg_iter == 0:
            loss_avg = sum(loss_avg) / len(loss_avg)
            lr = optim.lr
            ed = time.time()
            t_intv, glob_t_intv = ed - st, ed - glob_st
            eta = int((max_iter - it) * (glob_t_intv / it))
            eta = str(datetime.timedelta(seconds=eta))
            msg = ', '.join([
                'it: {it}/{max_it}',
                'lr: {lr:4f}',
                'loss: {loss:.4f}',
                'eta: {eta}',
                'time: {time:.4f}',
            ]).format(it=it + 1,
                      max_it=max_iter,
                      lr=lr,
                      loss=loss_avg,
                      time=t_intv,
                      eta=eta)
            logger.info(msg)
            loss_avg = []
            st = ed

    ## dump the final model
    save_pth = osp.join(respth, 'model_final.pth')
    net.cpu()
    state = net.module.state_dict() if hasattr(net,
                                               'module') else net.state_dict()
    if dist.get_rank() == 0: torch.save(state, save_pth)
    logger.info('training done, model saved to: {}'.format(save_pth))