def create_opt(parameters, config): if config.opt == "SGD": optimizer = optim.SGD(parameters, lr=config.lr, weight_decay=config.l2) elif config.opt == "Adam": optimizer = optim.Adam(parameters, lr=config.lr, weight_decay=config.l2) elif config.opt == "Adadelta": optimizer = optim.Adadelta(parameters, lr=config.lr, rho=config.rho, eps=config.eps, weight_decay=config.l2) elif config.opt == "Adagrad": optimizer = optim.Adagrad(parameters, lr=config.lr, weight_decay=config.l2) elif config.opt == "AdamW": print("Using AdamW") optimizer = AdamW(parameters, lr=config.lr, weight_decay=config.l2) return optimizer
for k in model_t_loc.state_dict(): if k in loaded_dict and sd[k].size() == loaded_dict[k].size(): sd[k] = loaded_dict[k] loaded_dict = sd model_t_loc.load_state_dict(loaded_dict) # named_parameters()包含网络模块名称 key为模型模块名称 value为模型模块值,可以通过判断模块名称进行对应模块冻结 for key, value in model_t_loc.named_parameters(): value.requires_grad = False del loaded_dict del sd del checkpoint if args.mode != "onlyT": params = model_s.parameters() optimizer = AdamW(params, lr=args.lr, weight_decay=args.weight_decay) model_s, optimizer = amp.initialize(model_s, optimizer, opt_level="O0") scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[5, 11, 17, 23, 29, 33, 47, 50, 60, 70, 90, 110, 130, 150, 170, 180, 190], gamma=0.5) else: params = model_t.parameters() optimizer = AdamW(params, lr=args.lr, weight_decay=args.weight_decay) model_t, optimizer = amp.initialize(model_t, optimizer, opt_level="O0") scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[5, 11, 17, 23, 29, 33, 47, 50, 60, 70, 90, 110, 130, 150, 170, 180, 190], gamma=0.5) if args.transfer and args.mode !='onlyT': snap_to_load = 'res50_loc_{}_KD_best'.format(seed) print("=> loading checkpoint '{}'".format(snap_to_load)) checkpoint = torch.load(path.join(models_folder, snap_to_load), map_location='cpu') loaded_dict = checkpoint['state_dict'] sd = model_s.state_dict() for k in model_s.state_dict():
batch_size=batch_size, num_workers=5, shuffle=True, pin_memory=False, drop_last=True) val_data_loader = DataLoader(val_train, batch_size=val_batch_size, num_workers=5, shuffle=False, pin_memory=False) model = SeResNext50_Unet_Loc().cuda() params = model.parameters() optimizer = AdamW(params, lr=0.00015, weight_decay=1e-6) model, optimizer = amp.initialize(model, optimizer, opt_level="O1") scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[ 15, 29, 43, 53, 65, 80, 90, 100, 110, 130, 150, 170, 180, 190 ], gamma=0.5) seg_loss = ComboLoss({'dice': 1.0, 'focal': 10.0}, per_image=False).cuda() best_score = 0 _cnt = -1 torch.cuda.empty_cache()