def main(args): num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.num_gpus = num_gpus args.distributed = num_gpus > 1 print(f'Using distributed: {args.distributed}') if args.distributed: print(f'Local rank: {args.local_rank}') torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.benchmark = True val_loader = build_val_loader(args) model = build_model(args) state_dict = torch.load(args.checkpoint)['state_dict'] state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()} model.load_state_dict(state_dict) device = torch.device('cuda') model.to(device) if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) test(args, model, val_loader, device)
def main(args): num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.num_gpus = num_gpus args.distributed = num_gpus > 1 print (f'Using distributed: {args.distributed}') if args.distributed: print (f'Local rank: {args.local_rank}') torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.benchmark = True train_loader = build_train_loader(args) model = build_model(args) device = torch.device('cuda') model.to(device) if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False ) if args.loss.get('custom',False): print('using WeightedBCELogitsLoss') criterion = WeightedBCELogitsLoss() else: criterion = torch.nn.BCEWithLogitsLoss() criterion.to(device) optimizer = torch.optim.SGD( model.parameters(), lr=args.train.lr, momentum=args.train.momentum, weight_decay=args.train.weight_decay, nesterov=True ) if args.resume: pass train(args, model, train_loader, criterion, optimizer, device)
def main(args): num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.num_gpus = num_gpus args.distributed = num_gpus > 1 print(f'Using distributed: {args.distributed}') if args.distributed: print(f'Local rank: {args.local_rank}') torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.benchmark = True model = build_model(args) args.last_iter = -1 if args.resume: checkpoint = torch.load(args.resume_path, map_location=lambda storage, loc: storage) state_dict = { k.replace('module.', ''): v for k, v in checkpoint['state_dict'].items() } args.last_iter = checkpoint['iter'] model.load_state_dict(state_dict) device = torch.device('cuda') model.to(device) if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False) optimizer = torch.optim.SGD(model.parameters(), lr=args.train.lr, momentum=args.train.momentum, weight_decay=args.train.weight_decay, nesterov=True) if args.resume: optimizer.load_state_dict(checkpoint['optimizer']) if args.loss.get('cls_loss', None) == 'focal': gamma = args.loss.get('focal_gamma', 0) alpha = args.loss.get('focal_alpha', None) print(f'using focal loss with gamma {gamma} alpha {alpha}') criterion = FocalLoss(gamma=gamma, alpha=alpha) elif args.loss.get('cls_loss', None) == 'CE': criterion = torch.nn.CrossEntropyLoss( size_average=args.loss.cls_size_average) criterion.to(device) if args.loss.get('kpt', None) == 'mse': kpt_criterion = torch.nn.MSELoss( size_average=args.loss.kpt_size_average) print('kpt using MSELoss') else: kpt_criterion = torch.nn.BCEWithLogitsLoss( size_average=args.loss.kpt_size_average) kpt_criterion.to(device) train_loader = build_train_loader(args) torch.cuda.empty_cache() train(args, model, train_loader, criterion, kpt_criterion, optimizer, device)