def stylize(args): img = None content_image = utils.tensor_load_rgbimage(args.content_image, scale=args.content_scale) content_image = content_image.unsqueeze(0) style_model = TransformerNet() style_model.load_state_dict(torch.load(args.model)) cam = cv2.VideoCapture(0) for x in range(0, 150): ret_val, img13 = cam.read() content_image = utils.tensor_load_rgbimage_cam( img13, scale=args.content_scale) content_image = content_image.unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image2 = Variable(utils.preprocess_batch(content_image), volatile=True) if args.cuda: style_model.cuda() output = style_model(content_image2) im = utils.tensor_ret_bgrimage(output.data[0], args.output_image, args.cuda) if img is None: img = pl.imshow(im) else: img.set_data(im) pl.pause(.1) pl.draw()
def stylize(**kwargs): ''' generate the picture use the style of the style_picture.jpg ''' for k_, v_, in kwargs.items(): setattr(opt, k_, v_) content_image = tv.datasets.folder.default_loader(opt.content_path) content_transfrom = tv.transforms.Compose([ tv.transforms.ToTensor(), #change value to (0,1) tv.transforms.Lambda(lambda x: x * 255) ]) #change value to (0,255) content_image = content_transfrom(content_image) content_image = Variable(content_image.unsqueeze(0), volatile=True) style_mode = TransformerNet().eval() # change to eval model style_mode.load_state_dict( t.load(opt.model_path, map_location=lambda _s, _: _s)) if opt.use_gpu == True: content_image = content_image.cuda() style_mode.cuda() output = style_mode(content_image) output_data = output.cpu().data[0] tv.utils.save_image((output_data / 255).clamp(min=0, max=1), opt.result_path)
def stylize(args): content_image = utils.load_image(args.content_image, scale=args.content_scale) content_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) content_image = content_transform(content_image) content_image = content_image.unsqueeze(0) if args.cuda: content_image = content_image.cuda() with torch.no_grad(): content_image = Variable(content_image) # hotfix PyTorch >0,4,0 model_dict = torch.load(args.model) model_dict_clone = model_dict.copy() # We can't mutate while iterating for key, value in model_dict_clone.items(): if key.endswith(('running_mean', 'running_var')): del model_dict[key] style_model = TransformerNet() style_model.load_state_dict(model_dict, False) # style_model.load_state_dict(torch.load(args.model)) # remove the original code if args.cuda: style_model.cuda() output = style_model(content_image) if args.cuda: output = output.cpu() output_data = output.data[0] utils.save_image(args.output_image, output_data)
def stylize(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) # 图片处理 content_image = tv.datasets.folder.default_loader(opt.content_path) content_transform = tv.transforms.Compose([ # 转为[0,1] tv.transforms.ToTensor(), # 转为[0,255] tv.transforms.Lambda(lambda x: x.mul(255)) ]) # 图片转化 content_image = content_transform(content_image) # 扩充第0维 content_image = content_image.unsqueeze(0) content_image = Variable(content_image, volatile=True) # 模型 style_model = TransformerNet().eval() style_model.load_state_dict( t.load(opt.model_path, map_location=lambda _s, _: _s)) if opt.use_gpu: content_image = content_image.cuda() style_model.cuda() # 风格迁移与保存 通过风格转化网络预测图片 output = style_model(content_image) output_data = output.cpu().data[0] # 转化为0-1 保存图像 tv.utils.save_image(((output_data / 255)).clamp(min=0, max=1), opt.result_path)
def evaluate(args): # device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu') model = TransformerNet() state_dict = torch.load(args.model) if args.gpus is not None: model = nn.DataParallel(model, device_ids=args.gpus) else: model = nn.DataParallel(model) model.load_state_dict(state_dict) if args.cuda: model.cuda() with torch.no_grad(): for root, dirs, filenames in os.walk(args.input_dir): for filename in filenames: if utils.is_image_file(filename): impath = osp.join(root, filename) img = utils.load_image(impath) img = img.unsqueeze(0) if args.cuda: img.cuda() rec_img = model(img) if args.cuda: rec_img = rec_img.cpu() img = img.cpu() save_path = osp.join(args.output_dir, filename) # utils.save_image(rec_img[0], save_path) utils.save_image_preserv_length(rec_img[0], img[0], save_path)
def stylize(args): content_image = utils.load_image(args.content_image, scale=args.content_scale) content_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) content_image = content_transform(content_image) content_image = content_image.unsqueeze(0).cuda() if args.model.endswith(".onnx"): output = stylize_onnx_caffe2(content_image, args) else: with torch.no_grad(): style_model = TransformerNet() state_dict = torch.load(args.model) # remove saved deprecated running_* keys in InstanceNorm from the checkpoint print(state_dict.keys()) for k in list(state_dict.keys()): if re.search(r'in\d+\.running_(mean|var)$', k): del state_dict[k] style_model.load_state_dict(state_dict) style_model.cuda() if args.export_onnx: assert args.export_onnx.endswith( ".onnx"), "Export model file should end with .onnx" output = torch.onnx._export(style_model, content_image, args.export_onnx).cpu() else: output = style_model(content_image).cpu() utils.save_image(args.output_image, output[0])
def stylize(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) # 图片处理 content_image = tv.datasets.folder.default_loader(opt.content_path) content_transform = tv.transforms.Compose([ tv.transforms.ToTensor(), tv.transforms.Lambda(lambda x: x.mul(255)) ]) content_image = content_transform(content_image) content_image = content_image.unsqueeze(0) content_image = Variable(content_image, volatile=True) # 模型 style_model = TransformerNet().eval() style_model.load_state_dict(t.load(opt.model_path, map_location=lambda _s, _: _s)) if opt.use_gpu: content_image = content_image.cuda() style_model.cuda() # 风格迁移与保存 output = style_model(content_image) output_data = output.cpu().data[0] tv.utils.save_image(((output_data / 255)).clamp(min=0, max=1), opt.result_path)
def stylize(args): if args.model.endswith(".onnx"): return stylize_onnx_caffe2(args) content_image = utils.tensor_load_rgbimage(args.content_image, scale=args.content_scale) content_image = content_image.unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image), requires_grad=False) style_model = TransformerNet() state_dict = torch.load(args.model) # removed_modules = ['in2'] in_names = [ "in1.scale", "in1.shift", "in2.scale", "in2.shift", "in3.scale", "in3.shift", "res1.in1.scale", "res1.in1.shift", "res1.in2.scale", "res1.in2.shift", "res2.in1.scale", "res2.in1.shift", "res2.in2.scale", "res2.in2.shift", "res3.in1.scale", "res3.in1.shift", "res3.in2.scale", "res3.in2.shift", "res4.in1.scale", "res4.in1.shift", "res4.in2.scale", "res4.in2.shift", "res5.in1.scale", "res5.in1.shift", "res5.in2.scale", "res5.in2.shift", "in4.scale", "in4.shift", "in5.scale", "in5.shift" ] # kl = list(state_dict.keys()) # for k in kl: for k in in_names: state_dict[k.replace("scale", "weight").replace("shift", "bias")] = state_dict.pop(k) style_model.load_state_dict(state_dict) if args.cuda: style_model.cuda() if args.half: style_model.half() content_image = content_image.half() if args.export_onnx: assert args.export_onnx.endswith( ".onnx"), "Export model file should end with .onnx" output = torch.onnx._export(style_model, content_image, args.export_onnx) else: output = style_model(content_image) if args.half: output = output.float() utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def stylize(args): content_image = utils.tensor_load_rgbimage(args.content_image, scale=args.content_scale) content_image = content_image.unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image), volatile=True) style_model = TransformerNet() style_model.load_state_dict(torch.load(args.model)) if args.cuda: style_model.cuda() output = style_model(content_image) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def stylize(content_image, model, content_scale=None, cuda=0): content_image = utils.tensor_load_rgbimage(content_image, scale=content_scale) content_image = content_image.unsqueeze(0) if cuda: content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image), volatile=True) style_model = TransformerNet() style_model.load_state_dict(torch.load(model)) if cuda: style_model.cuda() output = style_model(content_image) return utils.tensor_to_Image(output, cuda)
def stylize(args): # content_image = utils.tensor_load_rgbimage(args.content_image, scale=args.content_scale) # content_image = content_image.unsqueeze(0) content_image = np.loadtxt(args.content_image) upsample_ratio = 8 batch_size = 100 #content_image = content_image[:, :200] num_images = content_image.shape[1] num_batch = int(content_image.shape[1] / batch_size) output_model_total = [] for batch_id in range(num_batch): print('[{}]/[{}] iters '.format(batch_id + 1, num_batch)) x = content_image[:, batch_id * batch_size:(batch_id + 1) * batch_size] x = x.transpose() x = x.reshape((-1, 1, args.image_size_x, args.image_size_y)) x = torch.from_numpy(x).float() if args.cuda: x = x.cuda() x = Variable(x, volatile=True) style_model = TransformerNet() style_model.load_state_dict(torch.load(args.model)) if args.cuda: style_model.cuda() output_model = style_model(x) # output_image = output_image.numpy() output_model = output_model.data output_image = output_model.repeat(1, 3, 1, 1) output_model = output_model.cpu().numpy().astype(float) output_model = output_model.reshape( (batch_size, args.image_size_x * args.image_size_y * upsample_ratio**2)) output_model = output_model.transpose() output_model_total.append(output_model) output_model_total = np.hstack(output_model_total) np.savetxt(args.output_model, output_model_total)
def stylize(args): content_image = utils.load_image(args.content_image, scale=args.content_scale) content_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) content_image = content_transform(content_image) content_image = content_image.unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(content_image, volatile=True) style_model = TransformerNet() style_model.load_state_dict(torch.load(args.model)) if args.cuda: style_model.cuda() output = style_model(content_image) if args.cuda: output = output.cpu() output_data = output.data[0] utils.save_image(args.output_image, output_data)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 12, 'pin_memory': False} else: kwargs = {} from transform.color_op import Linearize, SRGB2XYZ, XYZ2CIE RGB2YUV = transforms.Compose([ Linearize(), SRGB2XYZ(), XYZ2CIE() ]) transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), RGB2YUV(), transforms.ToTensor(), # transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, **kwargs) transformer = TransformerNet(in_channels=2, out_channels=1) # input: LS, predict: M optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() transformer = nn.DataParallel(transformer) if args.cuda: if not torch.cuda.is_available(): raise RuntimeError("CUDA is requested, but related driver/device is not set properly.") transformer.cuda() for e in range(args.epochs): transformer.train() # agg_content_loss = 0. # agg_style_loss = 0. count = 0 for batch_id, (imgs, _) in enumerate(train_loader): n_batch = len(imgs) count += n_batch optimizer.zero_grad() # First channel x = torch.cat([imgs[:, :1, :, :].clone(), imgs[:, -1:, :, :].clone()], dim=1) # Second and third channels gt = imgs[:, 1:2, :, :].clone() if args.cuda: x = x.cuda() gt = gt.cuda() y = transformer(x) total_loss = mse_loss(y, gt) total_loss.backward() optimizer.step() if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), total_loss / (batch_id + 1) ) print(mesg) # save model transformer.eval() transformer.cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" os.makedirs(args.save_model_dir, exist_ok=True) save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) transform = transforms.Compose([ transforms.Scale(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_folder = DataLoader(train_dataset, batch_size=args.batch_size) transformer = TransformerNet() optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16(requires_grad=False) style_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) style = utils.load_image(args.style_image, size=args.style_size) style = style_transform(style) style = style.repeat(args.batch_size, 1, 1, 1) if args.cuda: transformer.cuda() vgg.cuda() style = style.cuda() style_v = Variable(style) style_v = utils.normalize_batch(style_v) features_style = vgg(style_v) gram_style = [utils.gram_matrix(y) for y in features_style] for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() x = Variable(x) if args.cuda: x = x.cuda() y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) style_loss = 0 for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.data[0] agg_style_loss += style_loss.data[0] if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_contetn_loss + agg_style_loss) / (batch_id + 1) ) print(mesg)
def train(**kwargs): for k_, v_ in kwargs.items(): setattr(opt, k_, v_) vis = utils.Visualizer(opt.env) # 数据加载 transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) loader = get_loader(batch_size=1, data_path=opt.data_path, img_shape=opt.img_shape, transform=transform) # 转换网络 transformer = TransformerNet().cuda() # transformer.load_state_dict(t.load(opt.model_path, )) #if opt.model_path: # transformer.load_state_dict(t.load(opt.model_path,map_location=lambda _s, _: _s)) # 损失网络 Vgg16 vgg = Vgg19().eval() depthnet = HourGlass().eval() depthnet.load_state_dict(t.load(opt.depth_path)) # print(vgg) # BASNET net = BASNet(3, 1).cuda() net.load_state_dict(torch.load('./basnet.pth')) net.eval() # 优化器 optimizer = t.optim.Adam(transformer.parameters(), lr=opt.lr) # 获取风格图片的数据 img = Image.open(opt.style_path) img = img.resize(opt.img_shape) img = transform(img).float() style = Variable(img, requires_grad=True).unsqueeze(0) vis.img('style', (style[0] * 0.225 + 0.45).clamp(min=0, max=1)) if opt.use_gpu: transformer.cuda() style = style.cuda() vgg.cuda() depthnet.cuda() # 风格图片的gram矩阵 style_v = Variable(style, volatile=True) features_style = vgg(style_v) gram_style = [Variable(utils.gram_matrix(y.data)) for y in features_style] # 损失统计 style_meter = tnt.meter.AverageValueMeter() content_meter = tnt.meter.AverageValueMeter() temporal_meter = tnt.meter.AverageValueMeter() long_temporal_meter = tnt.meter.AverageValueMeter() depth_meter = tnt.meter.AverageValueMeter() # tv_meter = tnt.meter.AverageValueMeter() kk = 0 for count in range(opt.epoch): print('Training Start!!') content_meter.reset() style_meter.reset() temporal_meter.reset() long_temporal_meter.reset() depth_meter.reset() # tv_meter.reset() for step, frames in enumerate(loader): for i in tqdm.tqdm(range(1, len(frames))): kk += 1 if (kk + 1) % 3000 == 0: print('LR had changed') for param in optimizer.param_groups: param['lr'] = max(param['lr'] / 1.2, 1e-4) optimizer.zero_grad() x_t = frames[i].cuda() x_t1 = frames[i - 1].cuda() h_xt = transformer(x_t) h_xt1 = transformer(x_t1) depth_x_t = depthnet(x_t) depth_x_t1 = depthnet(x_t1) depth_h_xt = depthnet(h_xt) depth_h_xt1 = depthnet(h_xt1) img1 = h_xt1.data.cpu().squeeze(0).numpy().transpose(1, 2, 0) img2 = h_xt.data.cpu().squeeze(0).numpy().transpose(1, 2, 0) flow, mask = opticalflow(img1, img2) d1, d2, d3, d4, d5, d6, d7, d8 = net(x_t) a1pha1 = PROCESS(d1, x_t) del d1, d2, d3, d4, d5, d6, d7, d8 d1, d2, d3, d4, d5, d6, d7, d8 = net(x_t1) a1pha2 = PROCESS(d1, x_t1) del d1, d2, d3, d4, d5, d6, d7, d8 h_xt_features = vgg(h_xt) h_xt1_features = vgg(h_xt1) x_xt_features = vgg(a1pha1) x_xt1_features = vgg(a1pha2) # ContentLoss, conv3_2 content_t = F.mse_loss(x_xt_features[2], h_xt_features[2]) content_t1 = F.mse_loss(x_xt1_features[2], h_xt1_features[2]) content_loss = opt.content_weight * (content_t1 + content_t) # StyleLoss style_t = 0 style_t1 = 0 for ft_y, gm_s in zip(h_xt_features, gram_style): gram_y = gram_matrix(ft_y) style_t += F.mse_loss(gram_y, gm_s.expand_as(gram_y)) for ft_y, gm_s in zip(h_xt1_features, gram_style): gram_y = gram_matrix(ft_y) style_t1 += F.mse_loss(gram_y, gm_s.expand_as(gram_y)) style_loss = opt.style_weight * (style_t1 + style_t) # # depth loss depth_loss1 = F.mse_loss(depth_h_xt, depth_x_t) depth_loss2 = F.mse_loss(depth_h_xt1, depth_x_t1) depth_loss = opt.depth_weight * (depth_loss1 + depth_loss2) # # TVLoss # print(type(s_hxt[layer]),s_hxt[layer].size()) # tv_loss = TVLoss(h_xt) #Long-temprol loss if (i - 1) % opt.sample_frames == 0: frames0 = h_xt1.cpu() long_img1 = frames0.data.cpu().squeeze( 0).numpy().transpose(1, 2, 0) # long_img2 = h_xt.data.cpu().squeeze(0).numpy().transpose(1,2,0) long_flow, long_mask = opticalflow(long_img1, img2) # Optical flow flow = torch.from_numpy(flow).permute(2, 0, 1).unsqueeze(0).to( torch.float32) long_flow = torch.from_numpy(long_flow).permute( 2, 0, 1).unsqueeze(0).to(torch.float32) # print(flow.size()) # print(h_xt1.size()) warped = warp(h_xt1.cpu().permute(0, 2, 3, 1), flow, opt.img_shape[1], opt.img_shape[0]).cuda() long_warped = warp(frames0.cpu().permute(0, 2, 3, 1), long_flow, opt.img_shape[1], opt.img_shape[0]).cuda() long_temporal_loss = F.mse_loss( h_xt, long_mask * long_warped.permute(0, 3, 1, 2)) # print(warped.size()) # tv.utils.save_image((warped.permute(0,3,1,2).data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1), # './warped.jpg') mask = mask.transpose(2, 0, 1) mask = torch.from_numpy(mask).cuda().to(torch.float32) # print(mask.shape) temporal_loss = F.mse_loss(h_xt, mask * warped.permute(0, 3, 1, 2)) temporal_loss = opt.temporal_weight * temporal_loss long_temporal_loss = opt.long_temporal_weight * long_temporal_loss # Spatial Loss spatial_loss = content_loss + style_loss Loss = spatial_loss + depth_loss + temporal_loss + long_temporal_loss Loss.backward(retain_graph=True) optimizer.step() content_meter.add(float(content_loss.data)) style_meter.add(float(style_loss.data)) temporal_meter.add(float(temporal_loss.data)) long_temporal_meter.add(float(long_temporal_loss.data)) depth_meter.add(float(depth_loss.data)) # tv_meter.add(float(tv_loss.data)) vis.plot('temporal_loss', temporal_meter.value()[0]) vis.plot('long_temporal_loss', long_temporal_meter.value()[0]) vis.plot('content_loss', content_meter.value()[0]) vis.plot('style_loss', style_meter.value()[0]) vis.plot('depth_loss', depth_meter.value()[0]) # vis.plot('tv_loss', tv_meter.value()[0]) if i % 10 == 0: vis.img('input(t)', (x_t.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) vis.img('output(t)', (h_xt.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) vis.img('output(t-1)', (h_xt1.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) print( 'epoch{},content loss:{},style loss:{},temporal loss:{},long temporal loss:{},depth loss:{},total loss{}' .format(count, content_loss, style_loss, temporal_loss, long_temporal_loss, depth_loss, Loss)) # print('epoch{},content loss:{},style loss:{},depth loss:{},total loss{}' # .format(count,content_loss, style_loss,depth_loss,Loss)) vis.save([opt.env]) torch.save(transformer.state_dict(), opt.model_path)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 0, 'pin_memory': False} transform = transforms.Compose([ transforms.Resize((args.image_size, args.image_size)), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = dataset.CustomImageDataset(args.dataset, transform=transform, img_size=args.image_size) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, **kwargs) transformer = TransformerNet(args.pad_type) transformer = transformer.train() optimizer = torch.optim.Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() #print(transformer) vgg = Vgg16() vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) vgg.eval() transformer = transformer.cuda() vgg = vgg.cuda() style = utils.tensor_load_resize(args.style_image, args.style_size) style = style.unsqueeze(0) print("=> Style image size: " + str(style.size())) #(1, H, W, C) style = utils.preprocess_batch(style).cuda() utils.tensor_save_bgrimage( style[0].detach(), os.path.join(args.save_model_dir, 'train_style.jpg'), True) style = utils.subtract_imagenet_mean_batch(style) features_style = vgg(style) gram_style = [utils.gram_matrix(y).detach() for y in features_style] for e in range(args.epochs): train_loader.dataset.reset() agg_content_loss = 0. agg_style_loss = 0. iters = 0 for batch_id, (x, _) in enumerate(train_loader): if x.size(0) != args.batch_size: print("=> Skip incomplete batch") continue iters += 1 optimizer.zero_grad() x = utils.preprocess_batch(x).cuda() y = transformer(x) if (batch_id + 1) % 1000 == 0: idx = (batch_id + 1) // 1000 utils.tensor_save_bgrimage( y.data[0], os.path.join(args.save_model_dir, "out_%d.png" % idx), True) utils.tensor_save_bgrimage( x.data[0], os.path.join(args.save_model_dir, "in_%d.png" % idx), True) y = utils.subtract_imagenet_mean_batch(y) x = utils.subtract_imagenet_mean_batch(x) features_y = vgg(y) features_x = vgg(center_crop(x, y.size(2), y.size(3))) #content target f_x = features_x[2].detach() # content f_y = features_y[2] content_loss = args.content_weight * mse_loss(f_y, f_x) style_loss = 0. for m in range(len(features_y)): gram_s = gram_style[m] gram_y = utils.gram_matrix(features_y[m]) batch_style_loss = 0 for n in range(gram_y.shape[0]): batch_style_loss += args.style_weight * mse_loss( gram_y[n], gram_s[0]) style_loss += batch_style_loss / gram_y.shape[0] total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.data agg_style_loss += style_loss.data mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, batch_id + 1, len(train_loader), agg_content_loss / iters, agg_style_loss / iters, (agg_content_loss + agg_style_loss) / iters) print(mesg) agg_content_loss = agg_style_loss = 0.0 iters = 0 # save model save_model_filename = "epoch_" + str(e) + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def optimize(args): content_image = utils.tensor_load_grayimage(args.content_image, size=args.content_size) content_image = content_image.unsqueeze(0) content_image = Variable(content_image, requires_grad=False) content_image = utils.subtract_imagenet_mean_batch_gray(content_image) style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) style_image = utils.subtract_imagenet_mean_batch(style_image) # generate the vector field that we want to stylize # size = args.content_size # vectors = np.zeros((size, size, 2), dtype=np.float32) # vortex_spacing = 0.5 # extra_factor = 2. # # a = np.array([1, 0]) * vortex_spacing # b = np.array([np.cos(np.pi / 3), np.sin(np.pi / 3)]) * vortex_spacing # rnv = int(2 * extra_factor / vortex_spacing) # vortices = [n * a + m * b for n in range(-rnv, rnv) for m in range(-rnv, rnv)] # vortices = [(x, y) for (x, y) in vortices if -extra_factor < x < extra_factor and -extra_factor < y < extra_factor] # # xs = np.linspace(-1, 1, size).astype(np.float32)[None, :] # ys = np.linspace(-1, 1, size).astype(np.float32)[:, None] # # for (x, y) in vortices: # rsq = (xs - x) ** 2 + (ys - y) ** 2 # vectors[..., 0] += (ys - y) / rsq # vectors[..., 1] += -(xs - x) / rsq # # for y in range(size): # for x in range(size): # angles[y, x] = math.atan(vectors[y, x, 1] / vectors[y, x, 0]) * 180 / math.pi # for y in range(size): # for x in range(size): # xx = float(x - size / 2) # yy = float(y - size / 2) # rsq = xx ** 2 + yy ** 2 # if (rsq == 0): # vectors[y, x, 0] = 0 # vectors[y, x, 1] = 0 # else: # vectors[y, x, 0] = -yy / rsq # vectors[y, x, 1] = xx / rsq # f = h5py.File("../datasets/fake/vector_fields/cat_test3.h5", 'r') # a_group_key = list(f.keys())[0] # vectors = f[a_group_key][:] # vectors = utils.tensor_load_vector_field(vectors) # vectors = Variable(vectors, requires_grad=False) # load the pre-trained vgg-16 and extract features vgg = Vgg16() # utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict(torch.load(os.path.join(args.vgg_model_dir, 'vgg16.weight'))) if args.cuda: style_image = style_image.cuda() vgg.cuda() features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # load the transformer net and extract features transformer_phi1 = TransformerNet() transformer_phi1.load_state_dict(torch.load(args.transformer_model_phi1_path)) if args.cuda: # vectors = vectors.cuda() content_image = content_image.cuda() transformer_phi1.cuda() vectors = transformer_phi1(content_image) vectors = Variable(vectors.data, requires_grad=False) # init optimizer content_image_size = content_image.data.size() output_size = np.asarray(content_image_size) output_size[1] = 3 output_size = torch.Size(output_size) output = Variable(torch.randn(output_size, device="cuda"), requires_grad=True) optimizer = Adam([output], lr=args.lr) mse_loss = torch.nn.MSELoss() cosine_loss = torch.nn.CosineEmbeddingLoss() # label = torch.ones(1, 1, args.content_size, args.content_size) label = torch.ones(1, 128, 128, 128) if args.cuda: label = label.cuda() # optimize the images transformer_phi2 = TransformerNet() transformer_phi2.load_state_dict(torch.load(args.transformer_model_phi2_path)) if args.cuda: transformer_phi2.cuda() tbar = trange(args.iters) for e in tbar: utils.imagenet_clamp_batch(output, 0, 255) optimizer.zero_grad() transformer_input = utils.gray_bgr_batch(output) transformer_y = transformer_phi2(transformer_input) content_loss = args.content_weight * cosine_loss(vectors, transformer_y, label) # content_loss = args.content_weight * mse_loss(vectors, transformer_y) vgg_input = output features_y = vgg(vgg_input) style_loss = 0 for m in range(len(features_y)): gram_y = utils.gram_matrix(features_y[m]) gram_s = Variable(gram_style[m].data, requires_grad=False) style_loss += args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss # total_loss = content_loss total_loss.backward() optimizer.step() tbar.set_description(str(total_loss.data.cpu().numpy().item())) if ((e+1) % args.log_interval == 0): print("iter: %d content_loss: %f style_loss %f" % (e, content_loss.item(), style_loss.item())) # save the image output = utils.add_imagenet_mean_batch_device(output, args.cuda) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) transform = transforms.Compose([ transforms.Scale(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) transformer = TransformerNet() optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16(requires_grad=False) style_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) style = utils.load_image(args.style_image, size=args.style_size) style = style_transform(style) style = style.repeat(args.batch_size, 1, 1, 1) if args.cuda: transformer.cuda() vgg.cuda() style = style.cuda() style_v = Variable(style) style_v = utils.normalize_batch(style_v) features_style = vgg(style_v) gram_style = [utils.gram_matrix(y) for y in features_style] for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() x = Variable(x) if args.cuda: x = x.cuda() y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.data[0] agg_style_loss += style_loss.data[0] if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1) ) print(mesg) if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: transformer.eval() if args.cuda: transformer.cpu() ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) if args.cuda: transformer.cuda() transformer.train() # save model transformer.eval() if args.cuda: transformer.cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
import cv2 import numpy as np import torch from imutils import paths from transformer_net import TransformerNet from PIL import Image from torchvision import transforms model = TransformerNet() model.load_state_dict(torch.load('checkpoints/GodBearer.pth')) model.cuda() model.eval() trm = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x * 255)]) cap = cv2.VideoCapture(0) while (True): success, img = cap.read() img = Image.fromarray(img).resize((512, 512)) img = np.array(img) cv2.imshow("before", img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img).resize((256, 256)) img = trm(img).cuda() t_img = model(img.unsqueeze(0)).squeeze(0).cpu() t_img /= 255
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu') transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), # utils.RGB2LAB(), transforms.ToTensor(), # utils.LAB2Tensor(), ]) pert_transform = transforms.Compose([utils.ColorPerturb()]) trainset = utils.FlatImageFolder(args.dataset, transform, pert_transform) trainloader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=4) model = TransformerNet() if args.gpus is not None: model = nn.DataParallel(model, device_ids=args.gpus) else: model = nn.DataParallel(model) if args.resume: state_dict = torch.load(args.resume) model.load_state_dict(state_dict) if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr) criterion = nn.MSELoss() start_time = datetime.now() for e in range(args.epochs): model.train() count = 0 acc_loss = 0.0 for batchi, (pert_img, ori_img) in enumerate(trainloader): count += len(pert_img) if args.cuda: pert_img = pert_img.cuda(non_blocking=True) ori_img = ori_img.cuda(non_blocking=True) optimizer.zero_grad() rec_img = model(pert_img) loss = criterion(rec_img, ori_img) loss.backward() optimizer.step() acc_loss += loss.item() if (batchi + 1) % args.log_interval == 0: mesg = '{}\tEpoch {}: [{}/{}]\ttotal loss: {:.6f}'.format( time.ctime(), e + 1, count, len(trainset), acc_loss / (args.log_interval)) print(mesg) acc_loss = 0.0 if args.checkpoint_dir and e + 1 != args.epochs: model.eval().cpu() ckpt_filename = 'ckpt_epoch_' + str(e + 1) + '.pth' ckpt_path = osp.join(args.checkpoint_dir, ckpt_filename) torch.save(model.state_dict(), ckpt_path) model.cuda().train() print('Checkpoint model at epoch %d saved' % (e + 1)) model.eval().cpu() if args.save_model_name: model_filename = args.save_model_name else: model_filename = "epoch_" + str(args.epochs) + "_" + str( time.ctime()).replace(' ', '_') + ".model" model_path = osp.join(args.save_model_dir, model_filename) torch.save(model.state_dict(), model_path) end_time = datetime.now() print('Finished training after %s, trained model saved at %s' % (end_time - start_time, model_path))
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 0, 'pin_memory': False} else: kwargs = {} training_set = np.loadtxt(args.dataset, dtype=np.float32) training_set_size = training_set.shape[1] num_batch = int(training_set_size / args.batch_size) transformer = TransformerNet() optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) if args.cuda: transformer.cuda() vgg.cuda() style = np.loadtxt(args.style_image, dtype=np.float32) style = style.reshape((1, 1, args.style_size_x, args.style_size_y)) style = torch.from_numpy(style) style = style.repeat(args.batch_size, 3, 1, 1) if args.cuda: style = style.cuda() style_v = Variable(style, volatile=True) style_v = utils.subtract_imagenet_mean_batch(style_v) features_style = vgg(style_v) gram_style = [utils.gram_matrix(y) for y in features_style] # Hard data if args.hard_data: hard_data = np.loadtxt(args.hard_data_file) # if not isinstance(hard_data[0], list): # hard_data = [hard_data] for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 # for batch_id, (x, _) in enumerate(train_loader): for batch_id in range(num_batch): x = training_set[:, batch_id * args.batch_size:(batch_id + 1) * args.batch_size] n_batch = x.shape[1] count += n_batch x = x.transpose() x = x.reshape((n_batch, 1, args.image_size_x, args.image_size_y)) # plt.imshow(x[0,:,:,:].squeeze(0)) # plt.show() x = torch.from_numpy(x).float() optimizer.zero_grad() x = Variable(x) if args.cuda: x = x.cuda() y = transformer(x) if args.hard_data: hard_data_loss = 0 num_hard_data = 0 for hd in hard_data: hard_data_loss += args.hard_data_weight * ( y[:, 0, hd[1], hd[0]] - hd[2] * 255.0).norm()**2 / n_batch num_hard_data += 1 hard_data_loss /= num_hard_data y = y.repeat(1, 3, 1, 1) # x = Variable(utils.preprocess_batch(x)) # xc = x.data.clone() # xc = xc.repeat(1, 3, 1, 1) # xc = Variable(xc, volatile=True) y = utils.subtract_imagenet_mean_batch(y) # xc = utils.subtract_imagenet_mean_batch(xc) features_y = vgg(y) # features_xc = vgg(xc) # f_xc_c = Variable(features_xc[1].data, requires_grad=False) # content_loss = args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_s = Variable(gram_style[m].data, requires_grad=False) gram_y = utils.gram_matrix(features_y[m]) style_loss += args.style_weight * mse_loss( gram_y, gram_s[:n_batch, :, :]) # total_loss = content_loss + style_loss total_loss = style_loss if args.hard_data: total_loss += hard_data_loss total_loss.backward() optimizer.step() # agg_content_loss += content_loss.data[0] agg_style_loss += style_loss.data[0] if (batch_id + 1) % args.log_interval == 0: if args.hard_data: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\thard_data: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, num_batch, agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), hard_data_loss.data[0], (agg_content_loss + agg_style_loss) / (batch_id + 1)) else: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, num_batch, agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1)) print(mesg) # save model transformer.eval() transformer.cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) transform = transforms.Compose([ transforms.Scale(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) transformer = TransformerNet() optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16(requires_grad=False) style_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) style = utils.load_image(args.style_image, size=args.style_size) style = style_transform(style) style = style.repeat(args.batch_size, 1, 1, 1) if args.cuda: transformer.cuda() vgg.cuda() style = style.cuda() style_v = Variable(style) style_v = utils.normalize_batch(style_v) features_style = vgg(style_v) gram_style = [utils.gram_matrix(y) for y in features_style] for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() x = Variable(x) if args.cuda: x = x.cuda() y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) content_loss = args.content_weight * mse_loss( features_y.relu2_2, features_x.relu2_2) style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.data[0] agg_style_loss += style_loss.data[0] if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1)) print(mesg) niter = e * len(train_dataset) + batch_id writer.add_scalar('content loss', agg_content_loss / (batch_id + 1), niter) writer.add_scalar('style loss', agg_style_loss / (batch_id + 1), niter) writer.add_scalar( 'total loss', agg_content_loss / (agg_content_loss + agg_style_loss) / (batch_id + 1), niter) if args.checkpoint_model_dir is not None and ( batch_id + 1) % args.checkpoint_interval == 0: transformer.eval() if args.cuda: transformer.cpu() ckpt_model_filename = "ckpt_epoch_" + str( e) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) if args.cuda: transformer.cuda() transformer.train() # save model transformer.eval() if args.cuda: transformer.cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 0, 'pin_memory': False} else: kwargs = {} class RGB2YUV(object): def __call__(self, img): import numpy as np import cv2 npimg = np.array(img) yuvnpimg = cv2.cvtColor(npimg, cv2.COLOR_RGB2YUV) pilimg = Image.fromarray(yuvnpimg) return pilimg transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), RGB2YUV(), transforms.ToTensor(), # transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, **kwargs) transformer = TransformerNet(in_channels=1, out_channels=2) # input: Y, predict: UV optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() # vgg = Vgg16() # utils.init_vgg16(args.vgg_model_dir) # vgg.load_state_dict(torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) transformer = nn.DataParallel(transformer) if args.cuda: if not torch.cuda.is_available(): raise RuntimeError( "CUDA is requested, but related driver/device is not set properly." ) transformer.cuda() for e in range(args.epochs): transformer.train() # agg_content_loss = 0. # agg_style_loss = 0. count = 0 for batch_id, (imgs, _) in enumerate(train_loader): n_batch = len(imgs) count += n_batch optimizer.zero_grad() # First channel x = imgs[:, :1, :, :].clone() # Second and third channels gt = imgs[:, 1:, :, :].clone() if args.cuda: x = x.cuda() gt = gt.cuda() y = transformer(x) total_loss = mse_loss(y, gt) total_loss.backward() optimizer.step() if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), total_loss / (batch_id + 1)) print(mesg) # save model transformer.eval() transformer.cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" os.makedirs(args.save_model_dir, exist_ok=True) save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def train(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) vis = utils.Visualizer(opt.env) # 数据加载 transfroms = tv.transforms.Compose([ tv.transforms.Scale(opt.image_size), tv.transforms.CenterCrop(opt.image_size), tv.transforms.ToTensor(), tv.transforms.Lambda(lambda x: x * 255) ]) dataset = tv.datasets.ImageFolder(opt.data_root, transfroms) dataloader = data.DataLoader(dataset, opt.batch_size) # 转换网络 transformer = TransformerNet() if opt.model_path: transformer.load_state_dict(t.load(opt.model_path, map_location=lambda _s, _: _s)) # 损失网络 Vgg16 vgg = Vgg16().eval() # 优化器 optimizer = t.optim.Adam(transformer.parameters(), opt.lr) # 获取风格图片的数据 style = utils.get_style_data(opt.style_path) vis.img('style', (style[0] * 0.225 + 0.45).clamp(min=0, max=1)) if opt.use_gpu: transformer.cuda() style = style.cuda() vgg.cuda() # 风格图片的gram矩阵 style_v = Variable(style, volatile=True) features_style = vgg(style_v) gram_style = [Variable(utils.gram_matrix(y.data)) for y in features_style] # 损失统计 style_meter = tnt.meter.AverageValueMeter() content_meter = tnt.meter.AverageValueMeter() for epoch in range(opt.epoches): content_meter.reset() style_meter.reset() for ii, (x, _) in tqdm.tqdm(enumerate(dataloader)): # 训练 optimizer.zero_grad() if opt.use_gpu: x = x.cuda() x = Variable(x) y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) # content loss content_loss = opt.content_weight * F.mse_loss(features_y.relu2_2, features_x.relu2_2) # style loss style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gram_y = utils.gram_matrix(ft_y) style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y)) style_loss *= opt.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() # 损失平滑 content_meter.add(content_loss.data[0]) style_meter.add(style_loss.data[0]) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # 可视化 vis.plot('content_loss', content_meter.value()[0]) vis.plot('style_loss', style_meter.value()[0]) # 因为x和y经过标准化处理(utils.normalize_batch),所以需要将它们还原 vis.img('output', (y.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) vis.img('input', (x.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) # 保存visdom和模型 vis.save([opt.env]) t.save(transformer.state_dict(), 'checkpoints/%s_style.pth' % epoch)
def train(args): # make sure each time we train, if args.seed stays the same, then # the random number we get is same as last time we train. np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) # 0-1 to 0-255 ]) # note the order: give where the images at; load the images and transform; give the batch size train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) # TODO: in transformernet transformer = TransformerNet() optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() # TODO: relus in vgg16 vgg = Vgg16(requires_grad=False) style_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) style = utils.load_image(args.style_image, size=args.style_size) # style2 = utils.load_image(args.style_image2, size=args.style_size) style = style_transform(style) # style2 = style_transform(style2) # repeat the style tensor 4 times style = style.repeat(args.batch_size, 1, 1, 1) # style2 = style2.repeat(args.batch_size, 1, 1, 1) if args.cuda: transformer.cuda() vgg.cuda() style = style.cuda() # style2 = style2.cuda() style_v = Variable(style) style_v = utils.normalize_batch(style_v) features_style = vgg(style_v) # style_v2 = Variable(style2) # style_v2 = utils.normalize_batch(style_v2) # features_style2 = vgg(style_v2) # to determine style loss, make use of gram matrix gram_style = [utils.gram_matrix(y) for y in features_style] # gram_style2 = [utils.gram_matrix(y) for y in features_style2] for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() # pytorch accumulates gradients, making them zero for each minibatch x = Variable(x) if args.cuda: x = x.cuda() # forward pass y = transformer(x) # after transformer - y y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) # TODO: mse_loss of which relu could be modified content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) # style_loss += mse_loss(gm_y, gm_s2[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss # backward pass total_loss.backward() # this simply computes the gradients for each learnable parameters # update weights optimizer.step() agg_content_loss += content_loss.data[0] agg_style_loss += style_loss.data[0] if (batch_id + 1) % args.log_interval == 0: msg = "Epoch "+str(e + 1)+" "+str(count)+"/"+str(len(train_dataset)) msg += " content loss : "+str(agg_content_loss / (batch_id + 1)) msg += " style loss : " +str(agg_style_loss / (batch_id + 1)) msg += " total loss : " +str((agg_content_loss + agg_style_loss) / (batch_id + 1)) print(msg) if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: transformer.eval() if args.cuda: transformer.cpu() ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) if args.cuda: transformer.cuda() transformer.train() # save model transformer.eval() if args.cuda: transformer.cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 0, 'pin_memory': False} else: kwargs = {} transform = transforms.Compose([ transforms.Scale(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, **kwargs) transformer = TransformerNet() optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) if args.cuda: transformer.cuda() vgg.cuda() style = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style = style.repeat(args.batch_size, 1, 1, 1) style = utils.preprocess_batch(style) if args.cuda: style = style.cuda() style_v = Variable(style, volatile=True) utils.subtract_imagenet_mean_batch(style_v) features_style = vgg(style_v) gram_style = [utils.gram_matrix(y) for y in features_style] for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() x = Variable(utils.preprocess_batch(x)) if args.cuda: x = x.cuda() y = transformer(x) xc = Variable(x.data.clone(), volatile=True) utils.subtract_imagenet_mean_batch(y) utils.subtract_imagenet_mean_batch(xc) features_y = vgg(y) features_xc = vgg(xc) f_xc_c = Variable(features_xc[1].data, requires_grad=False) content_loss = args.content_weight * mse_loss( features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_s = Variable(gram_style[m].data, requires_grad=False) gram_y = utils.gram_matrix(features_y[m]) style_loss += args.style_weight * mse_loss( gram_y, gram_s[:n_batch, :, :]) total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.data[0] agg_style_loss += style_loss.data[0] if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1)) print(mesg) # save model transformer.eval() transformer.cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def stylizeModel(model, content_scale=None, sleep_time=0): img = None postE = NoEffect() #trail(10) preE = resize() style_model = TransformerNet() style_model.load_state_dict(fix_model(torch.load(model))) cam = cv2.VideoCapture(0) preEprocess = preE.process utilsTensor_load_rgbimage_cam = utils.tensor_load_rgbimage_cam utils_preprocess_batch = utils.preprocess_batch utils_tensor_ret_bgrimage = utils.tensor_ret_bgrimage postE_process = postE.process cv2_resize = cv2.resize cv2_INTER_CUBIC = cv2.INTER_CUBIC #args_cuda = args.cuda cv2_imshow = cv2.imshow cv2_waitKey = cv2.waitKey #if args.cuda: style_model.cuda() rez = getRez() width1 = rez[0] height1 = rez[1] mysize = (width1, height1) ret_val, original = cam.read() original = cv2.flip(original, 1) height, width, channels = original.shape delayTest = 0 #original=cv2.flip(original,1) #original = preEprocess(original) with torch.no_grad(): # trace once for great justice ret_val, original = cam.read() content_image = utils.tensor_load_rgbimage_cam(original, scale=content_scale) content_image = content_image.unsqueeze(0) content_image = content_image.cuda() content_image = utils.preprocess_batch(content_image) traced_net = torch.jit.trace(style_model, (content_image, )) # now loop while True: if (sleep_time): sleep(sleep_time) ret_val, original = cam.read() # TODO: hack #original = original[140:340, 160:480] #original = cv2.resize(original, (480,640), cv2.INTER_LINEAR) #original = cv2.resize(original, (1080,1920), cv2.INTER_LINEAR) # TODO: re-enable? make parameter? #original = cv2.flip(original, 1) #original = preEprocess(original) content_image = utils.tensor_load_rgbimage_cam(original, scale=content_scale) content_image = content_image.unsqueeze(0) content_image = content_image.cuda() content_image = utils.preprocess_batch(content_image) #output = style_model(content_image2) output = traced_net(content_image) res = utils.tensor_ret_bgrimage(output[0]) #res = postE_process(original,postNN) res = cv2_resize(res, mysize, interpolation=cv2_INTER_CUBIC) #try to move this up cv2_imshow('frame', res) pressed_key = cv2_waitKey(1) & 0xFF if pressed_key == ord('q'): return True # On space move to next model elif pressed_key == 32: return False delayTest += 1 if delayTest % 30 == 0: delayTest = 0 if mTimer.isTransition(): #cv2.waitKey(1) return
def train(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) # 可视化操作 vis = utils.Visualizer(opt.env) # 数据加载 transfroms = tv.transforms.Compose([ # 将输入的`PIL.Image`重新改变大小成给定的`size` `size`是最小边的边长 tv.transforms.Scale(opt.image_size), tv.transforms.CenterCrop(opt.image_size), # 转为0-1之间 tv.transforms.ToTensor(), # 转为0-255之间 tv.transforms.Lambda(lambda x: x * 255) ]) # 封装数据集,并进行数据转化 dataset = tv.datasets.ImageFolder(opt.data_root, transfroms) # 数据加载器 dataloader = data.DataLoader(dataset, opt.batch_size) # 转换网络 transformer = TransformerNet() if opt.model_path: transformer.load_state_dict( t.load(opt.model_path, map_location=lambda _s, _: _s)) # 损失网络 Vgg16 置为预测模式 vgg = Vgg16().eval() # 优化器(需要训练 风格转化网络的参数) optimizer = t.optim.Adam(transformer.parameters(), opt.lr) # 获取风格图片的数据 形状 1*c*h*w, 分布 -2~2(使用预设) style = utils.get_style_data(opt.style_path) # 可视化风格图:-2 到2 转化为0-1 vis.img('style', (style[0] * 0.225 + 0.45).clamp(min=0, max=1)) if opt.use_gpu: transformer.cuda() style = style.cuda() vgg.cuda() # 风格图片的gram矩阵 style_v = Variable(style, volatile=True) # 得到vgg中间四层的结果(用以跟输入图片的输出四层比较,计算损失) features_style = vgg(style_v) # gram_matrix:输入 b,c,h,w 输出 b,c,c 计算gram矩阵(四层的gram矩阵) gram_style = [Variable(utils.gram_matrix(y.data)) for y in features_style] # 损失统计 仪表盘 用以可视化(每个epoch中的所有batch平均损失) # 风格损失 style_meter = tnt.meter.AverageValueMeter() # 内容损失 content_meter = tnt.meter.AverageValueMeter() for epoch in range(opt.epoches): # 仪表盘清零 content_meter.reset() style_meter.reset() for ii, (x, _) in tqdm.tqdm(enumerate(dataloader)): # 训练 optimizer.zero_grad() if opt.use_gpu: x = x.cuda() # x为输入的真实图像 x = Variable(x) # 风格转换后的预测图像为y y = transformer(x) # 输入: b, ch, h, w 0~255 # 输出: b, ch, h, w - 2~2 # 将x,y范围从0-255转化为-2-2 y = utils.normalize_batch(y) x = utils.normalize_batch(x) # 返回 四个中间层的特征输出 features_y = vgg(y) features_x = vgg(x) # content loss内容损失 只计算relu2_2之间的损失 预测图片与原图在relu2_2中间层比较,计算损失 # content_weight内容的权重 mse_loss均方误差损失函数 content_loss = opt.content_weight * F.mse_loss( features_y.relu2_2, features_x.relu2_2) # style loss style_loss = 0. # 风格损失取四层的均方误差损失总和 # features_y:预测图像的四层输出内容 gram_style:风格图像的四层输出的gram_matrix # zip将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表 for ft_y, gm_s in zip(features_y, gram_style): # 计算预测图像的四层输出内容的gram_matrix gram_y = utils.gram_matrix(ft_y) style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y)) style_loss *= opt.style_weight # 总损失=风格损失+内容损失 total_loss = content_loss + style_loss # 反向传播 total_loss.backward() # 更新参数 optimizer.step() # 损失平滑 将损失加入仪表盘,以便可视化损失过程 content_meter.add(content_loss.data[0]) style_meter.add(style_loss.data[0]) # 每plot_every次前向传播后可视化 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # 可视化 vis.plot('content_loss', content_meter.value()[0]) vis.plot('style_loss', style_meter.value()[0]) # 因为x和y经过标准化处理(utils.normalize_batch),所以需要将它们还原 #x,y为[-2,2]还原回[0,1] vis.img('output', (y.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) vis.img('input', (x.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) # 每次epoch完毕后保存visdom和模型 vis.save([opt.env]) t.save(transformer.state_dict(), 'checkpoints/%s_style.pth' % epoch)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 0, 'pin_memory': False} else: kwargs = {} transform = transforms.Compose([transforms.Scale(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, **kwargs) transformer = TransformerNet() if (args.premodel != ""): transformer.load_state_dict(torch.load(args.premodel)) print("load pretrain model:"+args.premodel) optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict(torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) if args.cuda: transformer.cuda() vgg.cuda() style = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style = style.repeat(args.batch_size, 1, 1, 1) style = utils.preprocess_batch(style) if args.cuda: style = style.cuda() style_v = Variable(style, volatile=True) style_v = utils.subtract_imagenet_mean_batch(style_v) features_style = vgg(style_v) gram_style = [utils.gram_matrix(y) for y in features_style] hori=0 writer = SummaryWriter(args.logdir,comment=args.logdir) for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. agg_cate_loss = 0. agg_cam_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() x = Variable(utils.preprocess_batch(x)) if args.cuda: x = x.cuda() y = transformer(x) xc = Variable(x.data.clone(), volatile=True) #print(y.size()) #(4L, 3L, 224L, 224L) # Calculate focus loss and category loss y_cam = utils.depreprocess_batch(y) y_cam = utils.subtract_mean_std_batch(y_cam) xc_cam = utils.depreprocess_batch(xc) xc_cam = utils.subtract_mean_std_batch(xc_cam) del features_blobs[:] logit_x = net(xc_cam) logit_y = net(y_cam) label=[] cam_loss = 0 for i in range(len(xc_cam)): h_x = F.softmax(logit_x[i]) probs_x, idx_x = h_x.data.sort(0, True) label.append(idx_x[0]) h_y = F.softmax(logit_y[i]) probs_y, idx_y = h_y.data.sort(0, True) x_cam = returnCAM(features_blobs[0][i], weight_softmax, idx_x[0]) x_cam = Variable(x_cam.data,requires_grad = False) y_cam = returnCAM(features_blobs[1][i], weight_softmax, idx_y[0]) cam_loss += mse_loss(y_cam, x_cam) #the focus loss cam_loss *= 80 #the category loss label = Variable(torch.LongTensor(label),requires_grad = False).cuda() cate_loss = 10000 * torch.nn.CrossEntropyLoss()(logit_y,label) y = utils.subtract_imagenet_mean_batch(y) xc = utils.subtract_imagenet_mean_batch(xc) features_y = vgg(y) features_xc = vgg(xc) #f_xc_c = Variable(features_xc[1].data, requires_grad=False) #content_loss = args.content_weight * mse_loss(features_y[1], f_xc_c) f_xc_c = Variable(features_xc[2].data, requires_grad=False) content_loss = args.content_weight * mse_loss(features_y[2], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_s = Variable(gram_style[m].data, requires_grad=False) gram_y = utils.gram_matrix(features_y[m]) style_loss += args.style_weight * mse_loss(gram_y, gram_s[:n_batch, :, :]) #add the total four loss and backward total_loss = style_loss + content_loss + cam_loss + cate_loss total_loss.backward() optimizer.step() #something for display agg_content_loss += content_loss.data[0] agg_style_loss += style_loss.data[0] agg_cate_loss += cate_loss.data[0] agg_cam_loss += cam_loss.data[0] writer.add_scalar("Loss_Cont", agg_content_loss / (batch_id + 1), hori) writer.add_scalar("Loss_Style", agg_style_loss / (batch_id + 1), hori) writer.add_scalar("Loss_CAM", agg_cam_loss / (batch_id + 1), hori) writer.add_scalar("Loss_Cate", agg_cate_loss / (batch_id + 1), hori) hori += 1 if (batch_id + 1) % args.log_interval == 0: mesg = "{}Epoch{}:[{}/{}] content:{:.2f} style:{:.2f} cate:{:.2f} cam:{:.2f} total:{:.2f}".format( time.strftime("%a %H:%M:%S"),e + 1, count, len(train_dataset), agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), agg_cate_loss / (batch_id + 1), agg_cam_loss / (batch_id + 1), (agg_content_loss + agg_style_loss + agg_cate_loss + agg_cam_loss ) / (batch_id + 1) ) print(mesg) if (batch_id + 1) % 2500 == 0: transformer.eval() transformer.cpu() save_model_filename = "epoch_" + str(e+1) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) transformer.cuda() transformer.train() print("saved at ",count) # save model transformer.eval() transformer.cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) writer.close() print("\nDone, trained model saved at", save_model_path)
def stylize(args): content_image = utils.load_image(args.contetn_image, sacle=args.content_scale) content_transform = transforms.Compos([ transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) content_image = content_transform(content_image) contetn_image = content_image.unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(content_image, volatile=True) style_model = TransformerNet() style_model.load_state_dict(content_image, volatile=True) if args.cuda: style_model.cuda() output = style_model(contetn_image) if args.cuda: output = output.cpu() output_data = output.data[0] utils.save_image(args.output_image, output_data) def main(): main_arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style") subparsers = main_arg_parser.add_subparsers(title="subcommands", dest="subcommand") train_arg_parser = subparsers.add_parser("train", help="parser for training arguments") train_arg_parser.add_argument("--epochs", type=int ,default=2, help="number of training epochs, default is 2") train_arg_parser.add_argument("--batch_size", type=int, default=4,
def train(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) # vis = utils.Visualizer(opt.env) # 数据加载 transfroms = tv.transforms.Compose([ tv.transforms.Scale(opt.image_size), tv.transforms.CenterCrop(opt.image_size), tv.transforms.ToTensor(), tv.transforms.Lambda(lambda x: x * 255) ]) dataset = tv.datasets.ImageFolder(opt.data_root, transfroms) dataloader = data.DataLoader(dataset, opt.batch_size) # 转换网络 transformer = TransformerNet() if opt.model_path: transformer.load_state_dict( t.load(opt.model_path, map_location=lambda _s, _: _s)) # 损失网络 Vgg16 vgg = Vgg16().eval() # 优化器 optimizer = t.optim.Adam(transformer.parameters(), opt.lr) # 获取风格图片的数据 style = utils.get_style_data(opt.style_path) print(style) exit() vis.img('style', (style[0] * 0.225 + 0.45).clamp(min=0, max=1)) if opt.use_gpu: transformer.cuda() style = style.cuda() vgg.cuda() # 风格图片的gram矩阵 style_v = Variable(style, volatile=True) features_style = vgg(style_v) gram_style = [Variable(utils.gram_matrix(y.data)) for y in features_style] # 损失统计 style_meter = tnt.meter.AverageValueMeter() content_meter = tnt.meter.AverageValueMeter() for epoch in range(opt.epoches): content_meter.reset() style_meter.reset() for ii, (x, _) in tqdm.tqdm(enumerate(dataloader)): # 训练 optimizer.zero_grad() if opt.use_gpu: x = x.cuda() x = Variable(x) y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) # content loss content_loss = opt.content_weight * F.mse_loss( features_y.relu2_2, features_x.relu2_2) # style loss style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gram_y = utils.gram_matrix(ft_y) style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y)) style_loss *= opt.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() # 损失平滑 content_meter.add(content_loss.data[0]) style_meter.add(style_loss.data[0]) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # 可视化 vis.plot('content_loss', content_meter.value()[0]) vis.plot('style_loss', style_meter.value()[0]) # 因为x和y经过标准化处理(utils.normalize_batch),所以需要将它们还原 vis.img('output', (y.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) vis.img('input', (x.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) # 保存visdom和模型 vis.save([opt.env]) t.save(transformer.state_dict(), 'checkpoints/%s_style.pth' % epoch)
def save_debug_image(tensor_orig, tensor_transformed, filename): assert tensor_orig.size() == tensor_transformed.size() result = Image.fromarray( recover_image(tensor_transformed.cpu().numpy())[0]) orig = Image.fromarray(recover_image(tensor_orig.cpu().numpy())[0]) new_im = Image.new('RGB', (result.size[0] * 2 + 5, result.size[1])) new_im.paste(orig, (0, 0)) new_im.paste(result, (result.size[0] + 5, 0)) new_im.save(filename) transformer = TransformerNet() mse_loss = torch.nn.MSELoss() # l1_loss = torch.nn.L1Loss() if torch.cuda.is_available(): transformer.cuda() CONTENT_WEIGHT = 1e4 STYLE_WEIGHT = 1e10 LOG_INTERVAL = 200 REGULARIZATION = 1e-7 LR = 1e-4 optimizer = Adam(transformer.parameters(), LR) transformer.train() for epoch in range(3): agg_content_loss = 0. agg_style_loss = 0. agg_reg_loss = 0. count = 0 for batch_id, (x, _) in tqdm_notebook(enumerate(train_loader),
def load_net(name): net = TransformerNet() net.load_state_dict(torch.load('trained_models/' + name)) net.cuda() return net