grad_scale = grad_scale / 8.0 logger("training phase") for batch_idx, (data, target) in enumerate(train_loader): indx_target = target.clone() if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = wage_util.SSE(output, target) loss.backward() for name, param in list(model.named_parameters())[::-1]: param.grad.data = wage_quantizer.QG(param.grad.data, args.wl_grad, grad_scale) optimizer.step() for name, param in list(model.named_parameters())[::-1]: param.data = wage_quantizer.C(param.data, args.wl_weight) if batch_idx % args.log_interval == 0 and batch_idx > 0: pred = output.data.max(1)[ 1] # get the index of the max log-probability correct = pred.cpu().eq(indx_target).sum() acc = float(correct) * 1.0 / len(data) logger( 'Train Epoch: {} [{}/{}] Loss: {:.6f} Acc: {:.4f} lr: {:.2e}' .format(epoch, batch_idx * len(data), len(train_loader.dataset), loss.data, acc,
if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = wage_util.SSE(output, target) loss.backward() # introduce non-ideal property j = 0 for name, param in list(model.named_parameters())[::-1]: velocity[j] = gamma * velocity[j] + alpha * param.grad.data param.grad.data = velocity[j] param.grad.data = wage_quantizer.QG( param.data, args.wl_weight, param.grad.data, args.wl_grad, grad_scale, torch.from_numpy(paramALTP[j]).cuda(), torch.from_numpy(paramALTD[j]).cuda(), args.max_level, args.max_level) j = j + 1 optimizer.step() for name, param in list(model.named_parameters())[::-1]: param.data = wage_quantizer.W(param.data, param.grad.data, args.wl_weight, args.c2cVari) if batch_idx % args.log_interval == 0 and batch_idx > 0: pred = output.data.max(1)[ 1] # get the index of the max log-probability correct = pred.cpu().eq(indx_target).sum() acc = float(correct) * 1.0 / len(data)