예제 #1
0
def main(args):
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.num_gpus = num_gpus
    args.distributed = num_gpus > 1
    print(f'Using distributed: {args.distributed}')
    if args.distributed:
        print(f'Local rank: {args.local_rank}')
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    val_loader = build_val_loader(args)
    model = build_model(args)
    state_dict = torch.load(args.checkpoint)['state_dict']
    state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
    model.load_state_dict(state_dict)
    device = torch.device('cuda')
    model.to(device)
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )
    test(args, model, val_loader, device)
예제 #2
0
def main(args):
    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.num_gpus = num_gpus
    args.distributed = num_gpus > 1
    print (f'Using distributed: {args.distributed}')
    if args.distributed:
        print (f'Local rank: {args.local_rank}')
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )
        synchronize()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    train_loader = build_train_loader(args)

    model = build_model(args)
    device = torch.device('cuda')
    model.to(device)
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.local_rank], output_device=args.local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False
        )

    if args.loss.get('custom',False):
        print('using WeightedBCELogitsLoss')
        criterion = WeightedBCELogitsLoss()
    else:
        criterion = torch.nn.BCEWithLogitsLoss()
    criterion.to(device)

    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=args.train.lr,
        momentum=args.train.momentum,
        weight_decay=args.train.weight_decay,
        nesterov=True
    )

    if args.resume:
        pass

    train(args, model, train_loader, criterion, optimizer, device)
예제 #3
0
def main(args):
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.num_gpus = num_gpus
    args.distributed = num_gpus > 1
    print(f'Using distributed: {args.distributed}')
    if args.distributed:
        print(f'Local rank: {args.local_rank}')
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    model = build_model(args)

    args.last_iter = -1
    if args.resume:
        checkpoint = torch.load(args.resume_path,
                                map_location=lambda storage, loc: storage)
        state_dict = {
            k.replace('module.', ''): v
            for k, v in checkpoint['state_dict'].items()
        }
        args.last_iter = checkpoint['iter']
        model.load_state_dict(state_dict)

    device = torch.device('cuda')
    model.to(device)
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.train.lr,
                                momentum=args.train.momentum,
                                weight_decay=args.train.weight_decay,
                                nesterov=True)
    if args.resume:
        optimizer.load_state_dict(checkpoint['optimizer'])

    if args.loss.get('cls_loss', None) == 'focal':
        gamma = args.loss.get('focal_gamma', 0)
        alpha = args.loss.get('focal_alpha', None)
        print(f'using focal loss with gamma {gamma} alpha {alpha}')
        criterion = FocalLoss(gamma=gamma, alpha=alpha)
    elif args.loss.get('cls_loss', None) == 'CE':
        criterion = torch.nn.CrossEntropyLoss(
            size_average=args.loss.cls_size_average)

    criterion.to(device)

    if args.loss.get('kpt', None) == 'mse':
        kpt_criterion = torch.nn.MSELoss(
            size_average=args.loss.kpt_size_average)
        print('kpt using MSELoss')
    else:
        kpt_criterion = torch.nn.BCEWithLogitsLoss(
            size_average=args.loss.kpt_size_average)
    kpt_criterion.to(device)

    train_loader = build_train_loader(args)
    torch.cuda.empty_cache()
    train(args, model, train_loader, criterion, kpt_criterion, optimizer,
          device)