class WeightEMA(object): def __init__(self, model, ema_model, alpha=0.999): self.model = model self.ema_model = ema_model self.alpha = alpha self.tmp_model = MobileNet(num_classes=16).cuda() self.wd = 0.02 * args.lr for param, ema_param in zip(self.model.parameters(), self.ema_model.parameters()): ema_param.data.copy_(param.data) def step(self, bn=False): if bn: # copy batchnorm stats to ema model for ema_param, tmp_param in zip(self.ema_model.parameters(), self.tmp_model.parameters()): tmp_param.data.copy_(ema_param.data.detach()) self.ema_model.load_state_dict(self.model.state_dict()) for ema_param, tmp_param in zip(self.ema_model.parameters(), self.tmp_model.parameters()): ema_param.data.copy_(tmp_param.data.detach()) else: one_minus_alpha = 1.0 - self.alpha for param, ema_param in zip(self.model.parameters(), self.ema_model.parameters()): ema_param.data.mul_(self.alpha) ema_param.data.add_(param.data.detach() * one_minus_alpha) # customized weight decay param.data.mul_(1 - self.wd)
def create_model(num_classes, ema=False): model = MobileNet(num_classes) model = torch.nn.DataParallel(model).cuda() if ema: for param in model.parameters(): param.detach_() return model
def create_model(num_classes, ema=False): model = MobileNet(num_classes) #model = WideResNet(num_classes) model.cuda() if ema: for param in model.parameters(): param.detach_() return model
def create_model(num_classes, ema=False): model = MobileNet(num_classes) #model = WideResNet(num_classes) model = torch.nn.DataParallel(model).cuda() if ema: for param in model.parameters(): param.detach_() #param.requires_grad = False return model