def __init__(self): self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.mosaic_WeightsPath = "Fast-Neural-Style-Transfer/weights/mosaic_10000.pth" self.cuphead_WeightsPath = mospath + "Fast-Neural-Style-Transfer/weights/cuphead_10000.pth" self.starry_night_WeightsPath = mospath + "Fast-Neural-Style-Transfer/weights/starry_night_10000.pth" # instance of style Transfer model. self.transformer = TransformerNet().to(self.device) self.transform = style_transform()
def init_model(self, hparams): if self.config.exp.model == 'xgboost': model = XGBoost(hparams) elif self.config.exp.model == 'lstm': model = LSTM(hparams) elif self.config.exp.model == 'transformer': model = TransformerNet(hparams) return model
def stylize(): net = TransformerNet() net.load_state_dict(torch.load(STYLE_TRANSFORM_PATH)) net = net.to(device) with torch.no_grad(): while (1): torch.cuda.empty_cache() print("Stylize Image~ Press Ctrl+C and Enter to close the program") content_image_path = input("Enter the image path: ") content_image = cv2.imread(content_image_path) content_tensor = itot(content_image) generated_tensor = net(content_tensor) generated_image = ttoi(generated_tensor) generated_image = cv2.cvtColor(generated_image, cv2.COLOR_BGR2RGB) plt.imshow(generated_image) plt.show()
class Inference(): """ Inference class that wrapped styleTransfer classes. """ def __init__(self): self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.mosaic_WeightsPath = "Fast-Neural-Style-Transfer/weights/mosaic_10000.pth" self.cuphead_WeightsPath = mospath + "Fast-Neural-Style-Transfer/weights/cuphead_10000.pth" self.starry_night_WeightsPath = mospath + "Fast-Neural-Style-Transfer/weights/starry_night_10000.pth" # instance of style Transfer model. self.transformer = TransformerNet().to(self.device) self.transform = style_transform() def load_model(self, modelType): """ Args: modelType: string that contains name of the model to switch to it. """ if modelType.lower() == "mosaic": self.transformer.load_state_dict(torch.load( self.mosaic_WeightsPath, map_location=torch.device('cpu'))) elif modelType.lower() == "cuphead": self.transformer.load_state_dict(torch.load( self.cuphead_WeightsPath, map_location=torch.device('cpu'))) else: self.transformer.load_state_dict(torch.load( self.starry_night_WeightsPath, map_location=torch.device('cpu'))) self.transformer.eval() def predict(self, modelType, Base64Img): img = base64.b64decode(Base64Img) image = Image.open(BytesIO(img)).convert('RGB') self.load_model(modelType) image_tensor = Variable(self.transform(image)).to(self.device) image_tensor = image_tensor.unsqueeze(0) # Stylize image with torch.no_grad(): stylized_image = denormalize(self.transformer(image_tensor)).cpu() imageBytes = save_image( stylized_image, "/home/mostafax/Desktop/Style-Transfer-App/Fast-Neural-Style-Transfer/images/result.jpeg") my_string = base64.b64encode(imageBytes) return my_string
def stylize(args): device = torch.device("cuda" if args.cuda else "cpu") style_model = TransformerNet() state_dict = torch.load(args.model) style_model.load_state_dict(state_dict) style_model.to(device) img_list = os.listdir(args.content_dir) img_list.sort() for img in tqdm(img_list): img_path = args.content_dir + img content_org = utils.load_image(img_path, scale=args.content_scale) content_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) content_image = content_transform(content_org) content_image = content_image.unsqueeze(0).to(device) with torch.no_grad(): output = style_model(content_image).cpu() output = output[0] output = output.clone().clamp(0, 255).numpy() output = output.transpose(1, 2, 0).astype("uint8") output = Image.fromarray(output) if args.keep_colors: output = utils.original_colors(content_org, output) output.save(args.output_dir + img)
def stylize(**kwargs): opt.parse(**kwargs) device = t.device('cuda') if opt.use_gpu else t.device('cpu') # data # 对单张图片进行加载验证 content_image = tv.datasets.folder.default_loader(opt.content_path) # 这个transform更看不懂了,竟然不需要resize和centrocrop,这图片大小不一致,能送进去么???? content_transform = tv.transforms.Compose([ tv.transforms.ToTensor(), tv.transforms.Lambda(lambda x: x.mul(255)) ]) content_image = content_transform(content_image) content_image = content_image.unsqueeze(0).to(device).detach() # model style_model = TransformerNet().eval() style_model.load_state_dict(t.load(opt.model_path, map_location=lambda _s, _: _s)) style.to(device) # stylize output = style_model(content_image) # 0-255~ output_data = (output.cpu().data[0]/255).clamp(min=0, max=1) tv.utils.save_image(output_data,opt.result_path)
def test(): image_path = './image_folder' ckpt = './path_to_ckpt' device = torch.device("cuda" if torch.cuda.is_available() else "cpu") transform = test_transform() transformer = TransformerNet().to(device) transformer.load_state_dict(torch.load(ckpt)) transformer.eval() image = torch.Tensor(transform(Image.open(image_path))).to(device) image = image.unsqueeze(0) with torch.no_grad(): style_trans_img = denormalize(transformer(image)).cpu() fn = image_path.split("/")[-1] save_image(style_trans_img, f"images/outputs/stylized-{fn}")
def style_image(image_path, model_path): image = Image.open(image_path) width, height = image.size alpha = image.convert('RGBA').split()[-1] # @TODO - import the mean color... mean_color = Image.new("RGB", image.size, (124, 116, 103)) rgb_image = image.convert('RGB') rgb_image.paste(mean_color, mask=invert(alpha)) cuda_available = torch.cuda and torch.cuda.is_available() device = torch.device("cuda" if cuda_available else "cpu") model = torch.load(model_path, map_location=device) image_filename = os.path.basename(image_path) model_filename = os.path.basename(model_path) model_name = os.path.splitext(model_filename)[0] os.makedirs(f"images/outputs/{model_name}", exist_ok=True) transform = style_transform() # Define model and load model checkpoint transformer = TransformerNet().to(device) transformer.load_state_dict(model) transformer.eval() # Prepare input image_tensor = Variable(transform(rgb_image)).to(device) image_tensor = image_tensor.unsqueeze(0) # Stylize image with torch.no_grad(): output_tensor = depro(transformer(image_tensor)) stylized_image = F.to_pil_image(output_tensor) \ .convert('RGBA') \ .crop((0, 0, width, height)) stylized_image.putalpha(alpha) stylized_image.save(f"images/outputs/{model_name}/{image_filename}", 'PNG')
def transfer_img(usr_img_path, style_model_path, new_img_path): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Define model and load model checkpoint transformer = TransformerNet().to(device) transformer.load_state_dict(torch.load(style_model_path)) transformer.eval() # Prepare input transform = style_transform() image_tensor = transform(Image.open(usr_img_path)).to(device) image_tensor = image_tensor.unsqueeze(0) # Stylize image with torch.no_grad(): stylized_image = denormalize(transformer(image_tensor)).cpu() # Save image save_image(stylized_image, new_img_path)
def exportStyleTransfer(image_path, style): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") styles = ['9', '32', '51', '55', '56', '58', '108', '140', '150', '153', '154', '155', '156'] checkpoint_model = '/home/KLTN_TheFaceOfArtFaceParsing/Updates/StyleTransfer/models/' + styles[style-1] + '_4000.pth' transform = style_transform() # Define model and load model checkpoint transformer = TransformerNet().to(device) transformer.load_state_dict(torch.load(checkpoint_model)) transformer.eval() # Prepare input image_tensor = Variable(transform(Image.open(image_path))).to(device) image_tensor = image_tensor.unsqueeze(0) # Stylize image with torch.no_grad(): stylized_image = denormalize(transformer(image_tensor)).cpu() # Save image save_image(stylized_image,'/home/KLTN_TheFaceOfArtFaceParsing/result.jpg')
return db_op_wrapper #################################################################################### # LOAD AND COMPILE MODEL #################################################################################### size = 640, 640 transform = style_transform() # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # ohne gpu ist video streaming zu langsam device = torch.device("cpu") # Define model and load model checkpoint transformer = TransformerNet().to(device) transformer.load_state_dict( torch.load("models/_active_model.pth", map_location='cpu')) transformer.eval() #################################################################################### # CAMERA CLASS (with integrated messaging queue) #################################################################################### class Camera(): def __init__(self): self.to_process = [] self.to_output = [] thread = threading.Thread(target=self.keep_processing, args=())
type=int, default=2500, help="Batches between saving model") args = parser.parse_args() style_name = args.style_image.split("\\")[-1].split(".")[0] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 为数据集创建一个DataLoader train_dataset = datasets.ImageFolder(args.dataset_path, train_transform(args.image_size)) dataloader = DataLoader(train_dataset, batch_size=args.batch_size) # 定义两个网络框架 transformer = TransformerNet().to(device) vgg = VGG16(requires_grad=False).to(device) # 定义optimizer和损失loss optimizer = Adam(transformer.parameters(), args.lr) l2_loss = torch.nn.MSELoss().to(device) # 加载风格图片 style = style_transform(args.style_size)(Image.open(args.style_image)) style = style.repeat(args.batch_size, 1, 1, 1).to(device) # 提取风格特征 features_style = vgg(style) gram_style = [gram_matrix(y) for y in features_style] for epoch in range(args.epochs):
if __name__ == "__main__": device = get_device() content_image = load_image(test_image) content_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) content_image = content_transform(content_image) content_image = content_image.unsqueeze(0).to(device) with torch.no_grad(): style_model = TransformerNet() ckpt_model_path = os.path.join(checkpoint_dir, checkpoint_file) checkpoint = torch.load(ckpt_model_path, map_location=device) # remove saved deprecated running_* keys in InstanceNorm from the checkpoint for k in list(checkpoint.keys()): if re.search(r'in\d+\.running_(mean|var)$', k): # in200.running_var or in200.running_mean del checkpoint[k] style_model.load_state_dict(checkpoint['model_state_dict']) style_model.to(device) output = style_model(content_image).cpu() save_image(output_image, output[0])
def get_transformer(checkpoint_path: str) -> torch.nn.Module: transformer = TransformerNet().to(device) transformer.load_state_dict(torch.load(checkpoint_path, map_location=device)) transformer.eval() return transformer
def train(**kwargs): # step1:config opt.parse(**kwargs) vis = Visualizer(opt.env) device = t.device('cuda') if opt.use_gpu else t.device('cpu') # step2:data # dataloader, style_img # 这次图片的处理和之前不一样,之前都是normalize,这次改成了lambda表达式乘以255,这种转化之后要给出一个合理的解释 # 图片共分为两种,一种是原图,一种是风格图片,在作者的代码里,原图用于训练,需要很多,风格图片需要一张,用于损失函数 transforms = T.Compose([ T.Resize(opt.image_size), T.CenterCrop(opt.image_size), T.ToTensor(), T.Lambda(lambda x: x*255) ]) # 这次获取图片的方式和第七章一样,仍然是ImageFolder的方式,而不是dataset的方式 dataset = tv.datasets.ImageFolder(opt.data_root,transform=transforms) dataloader = DataLoader(dataset,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers,drop_last=True) style_img = get_style_data(opt.style_path) # 1*c*H*W style_img = style_img.to(device) vis.img('style_image',(style_img.data[0]*0.225+0.45).clamp(min=0,max=1)) # 个人觉得这个没必要,下次可以实验一下 # step3: model:Transformer_net 和 损失网络vgg16 # 整个模型分为两部分,一部分是转化模型TransformerNet,用于转化原始图片,一部分是损失模型Vgg16,用于评价损失函数, # 在这里需要注意一下,Vgg16只是用于评价损失函数的,所以它的参数不参与反向传播,只有Transformer的参数参与反向传播, # 也就意味着,我们只训练TransformerNet,只保存TransformerNet的参数,Vgg16的参数是在网络设计时就已经加载进去的。 # Vgg16是以验证model.eval()的方式在运行,表示其中涉及到pooling等层会发生改变 # 那模型什么时候开始model.eval()呢,之前是是val和test中就会这样设置,那么Vgg16的设置理由是什么? # 这里加载模型的时候,作者使用了简单的map_location的记录方法,更轻巧一些 # 发现作者在写这些的时候越来越趋向方便的方式 # 在cuda的使用上,模型的cuda是直接使用的,而数据的cuda是在正式训练的时候才使用的,注意一下两者的区别 # 在第七章作者是通过两种方式实现网络分离的,一种是对于前面网络netg,进行 fake_img = netg(noises).detach(),使得非叶子节点变成一个类似不需要邱求导的叶子节点 # 第四章还需要重新看, transformer_net = TransformerNet() if opt.model_path: transformer_net.load_state_dict(t.load(opt.model_path,map_location= lambda _s, _: _s)) transformer_net.to(device) # step3: criterion and optimizer optimizer = t.optim.Adam(transformer_net.parameters(),opt.lr) # 此通过vgg16实现的,损失函数包含两个Gram矩阵和均方误差,所以,此外,我们还需要求Gram矩阵和均方误差 vgg16 = Vgg16().eval() # 待验证 vgg16.to(device) # vgg的参数不需要倒数,但仍然需要反向传播 # 回头重新考虑一下detach和requires_grad的区别 for param in vgg16.parameters(): param.requires_grad = False criterion = t.nn.MSELoss(reduce=True, size_average=True) # step4: meter 损失统计 style_meter = meter.AverageValueMeter() content_meter = meter.AverageValueMeter() total_meter = meter.AverageValueMeter() # step5.2:loss 补充 # 求style_image的gram矩阵 # gram_style:list [relu1_2,relu2_2,relu3_3,relu4_3] 每一个是b*c*c大小的tensor with t.no_grad(): features = vgg16(style_img) gram_style = [gram_matrix(feature) for feature in features] # 损失网络 Vgg16 # step5: train for epoch in range(opt.epoches): style_meter.reset() content_meter.reset() # step5.1: train for ii,(data,_) in tqdm(enumerate(dataloader)): optimizer.zero_grad() # 这里作者没有进行 Variable(),与之前不同 # pytorch 0.4.之后tensor和Variable不再严格区分,创建的tensor就是variable # https://mp.weixin.qq.com/s?__biz=MzI0ODcxODk5OA==&mid=2247494701&idx=2&sn=ea8411d66038f172a2f553770adccbec&chksm=e99edfd4dee956c23c47c7bb97a31ee816eb3a0404466c1a57c12948d807c975053e38b18097&scene=21#wechat_redirect data = data.to(device) y = transformer_net(data) # vgg对输入的图片需要进行归一化 data = normalize_batch(data) y = normalize_batch(y) feature_data = vgg16(data) feature_y = vgg16(y) # 疑问??现在的feature是一个什么样子的向量? # step5.2: loss:content loss and style loss # content_loss # 在这里和书上的讲的不一样,书上是relu3_3,代码用的是relu2_2 # https://blog.csdn.net/zhangxb35/article/details/72464152?utm_source=itdadao&utm_medium=referral # 均方误差指的是一个像素点的损失,可以理解N*b*h*w个元素加起来,然后除以N*b*h*w # 随机梯度下降法本身就是对batch内loss求平均后反向传播 content_loss = opt.content_weight*criterion(feature_y.relu2_2,feature_data.relu2_2) # style loss # style loss:relu1_2,relu2_2,relu3_3,relu3_4 # 此时需要求每一张图片的gram矩阵 style_loss = 0 # tensor也可以 for i in tensor:,此时只拆解外面一层的tensor # ft_y:b*c*h*w, gm_s:1*c*h*w for ft_y, gm_s in zip(feature_y, gram_style): gram_y = gram_matrix(ft_y) style_loss += criterion(gram_y, gm_s.expand_as(gram_y)) style_loss *= opt.style_weight total_loss = content_loss + style_loss optimizer.zero_grad() total_loss.backward() optimizer.step() #import ipdb #ipdb.set_trace() # 获取tensor的值 tensor.item() tensor.tolist() content_meter.add(content_loss.item()) style_meter.add(style_loss.item()) total_meter.add(total_loss.item()) # step5.3: visualize if (ii+1)%opt.print_freq == 0 and opt.vis: # 为什么总是以这种形式进行debug if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() vis.plot('content_loss',content_meter.value()[0]) vis.plot('style_loss',style_meter.value()[0]) vis.plot('total_loss',total_meter.value()[0]) # 因为现在data和y都已经经过了normalize,变成了-2~2,所以需要把它变回去0-1 vis.img('input',(data.data*0.225+0.45)[0].clamp(min=0,max=1)) vis.img('output',(y.data*0.225+0.45)[0].clamp(min=0,max=1)) # step 5.4 save and validate and visualize if (epoch+1) % opt.save_every == 0: t.save(transformer_net.state_dict(), 'checkpoints/%s_style.pth' % epoch) # 保存图片的几种方法,第七章的是 # tv.utils.save_image(fix_fake_imgs,'%s/%s.png' % (opt.img_save_path, epoch),normalize=True, range=(-1,1)) # vis.save竟然没找到 我的神 vis.save([opt.env])
def train(args): device = torch.device("cuda" if args.cuda else "cpu") np.random.seed(args.seed) torch.manual_seed(args.seed) data_train = load_data(args) iterator = data_train transformer = TransformerNet().to(device) optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16(weights=args.vgg16, requires_grad=False).to(device) style_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) style = utils.load_image(args.style_image, size=args.style_size) style = style_transform(style) style = style.repeat(args.batch_size, 1, 1, 1).to(device) features_style = vgg(utils.normalize_batch(style)) gram_style = [utils.gram_matrix(y) for y in features_style] for e in range(args.epochs): transformer.train() count = 0 if args.noise_count: noiseimg_n = np.zeros((3, args.image_size, args.image_size), dtype=np.float32) # Preparing noise image. for n_c in range(args.noise_count): x_n = random.randrange(args.image_size) y_n = random.randrange(args.image_size) noiseimg_n[0][x_n][y_n] += random.randrange( -args.noise, args.noise) noiseimg_n[1][x_n][y_n] += random.randrange( -args.noise, args.noise) noiseimg_n[2][x_n][y_n] += random.randrange( -args.noise, args.noise) noiseimg = torch.from_numpy(noiseimg_n) noiseimg = noiseimg.to(device) for batch_id, sample in enumerate(iterator): x = sample['image'] n_batch = len(x) count += n_batch optimizer.zero_grad() x = x.to(device) if args.noise_count: # Adding the noise image to the source image. noisy_x = x + noiseimg noisy_y = transformer(noisy_x) noisy_y = utils.normalize_batch(noisy_y) y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) L_feat = args.lambda_feat * mse_loss(features_y.relu2_2, features_x.relu2_2) L_style = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) L_style += mse_loss(gm_y, gm_s[:n_batch, :, :]) L_style *= args.lambda_style L_tv = (torch.sum(torch.abs(y[:, :, :, :-1] - y[:, :, :, 1:])) + torch.sum(torch.abs(y[:, :, :-1, :] - y[:, :, 1:, :]))) L_tv *= args.lambda_tv if args.noise_count: L_pop = args.lambda_noise * F.mse_loss(y, noisy_y) L = L_feat + L_style + L_tv + L_pop print( 'Epoch {},{}/{}. Total loss: {}. Loss distribution: feat {}, style {}, tv {}, pop {}' .format(e, batch_id, len(data_train), L.data, L_feat.data / L.data, L_style.data / L.data, L_tv.data / L.data, L_pop.data / L.data)) else: L = L_feat + L_style + L_tv print( 'Epoch {},{}/{}. Total loss: {}. Loss distribution: feat {}, style {}, tv {}' .format(e, batch_id, len(data_train), L.data, L_feat.data / L.data, L_style.data / L.data, L_tv.data / L.data)) L = L_style * 1e10 + L_feat * 1e5 L.backward() optimizer.step() transformer.eval().cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.ctime()).replace(' ', '_') + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
parser.add_argument("--checkpoint_model", type=str, help="Optional path to checkpoint model") parser.add_argument("--checkpoint_interval", type=int, default=10000, help="Batches between saving model") parser.add_argument("--sample_interval", type=int, default=1000, help="Batches between saving image samples") args = parser.parse_args() style_name = args.style_image.split("/")[-1].split(".")[0] os.makedirs(f"images/outputs/{style_name}-training", exist_ok=True) os.makedirs(f"checkpoints", exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create dataloader for the training data train_dataset = datasets.ImageFolder(args.dataset_path, train_transform(args.image_size)) dataloader = DataLoader(train_dataset, batch_size=args.batch_size) # Defines networks transformer = TransformerNet().to(device) vgg = VGG16(requires_grad=False).to(device) # Load checkpoint model if specified if args.checkpoint_model: transformer.load_state_dict(torch.load(args.checkpoint_model)) # Define optimizer and loss optimizer = Adam(transformer.parameters(), args.lr) l2_loss = torch.nn.MSELoss().to(device) # Load style image style = style_transform(args.style_size)(Image.open(args.style_image)) style = style.repeat(args.batch_size, 1, 1, 1).to(device) # Extract style features
help="Batches between saving image samples") args = parser.parse_args() style_name = args.style_image.split("/")[-1].split(".")[0] os.makedirs(f"images/outputs/{style_name}-training", exist_ok=True) os.makedirs(f"checkpoints", exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create dataloader for the training data train_dataset = datasets.ImageFolder(args.dataset_path, train_transform(args.image_size)) dataloader = DataLoader(train_dataset, batch_size=args.batch_size) # Defines networks transformer = TransformerNet().to(device) vgg = VGG16(requires_grad=False).to(device) # Load checkpoint model if specified if args.checkpoint_model: transformer.load_state_dict(torch.load(args.checkpoint_model)) # Define optimizer and loss optimizer = Adam(transformer.parameters(), args.lr) l2_loss = torch.nn.MSELoss().to(device) # Load style image style = style_transform(args.style_size)(Image.open(args.style_image)) style = style.repeat(args.batch_size, 1, 1, 1).to(device) # Extract style features
def train(): parser = argparse.ArgumentParser(description='parser for style transfer') parser.add_argument('--dataset_path', type=str, default=r'C:\Users\Dewey\data\celeba', help='path to training dataset') parser.add_argument('--style_image', type=str, default='mosaic.jpg', help='path to style img') parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--batch_size', type=int, default=4) parser.add_argument('--image_size', type=int, default=256, help='training image size') parser.add_argument('--style_img_size', type=int, default=256, help='style image size') parser.add_argument("--lambda_content", type=float, default=1e5, help="Weight for content loss") parser.add_argument("--lambda_style", type=float, default=1e10, help="Weight for style loss") parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument("--checkpoint_model", type=str, help="Optional path to checkpoint model") parser.add_argument("--checkpoint_interval", type=int, default=2000, help="Batches between saving model") parser.add_argument("--sample_interval", type=int, default=1000, help="Batches between saving image samples") parser.add_argument('--sample_format', type=str, default='jpg', help='sample image format') args = parser.parse_args() style_name = args.style_image.split('/')[-1].split('.')[0] os.makedirs(f'images/outputs/{style_name}-training', exist_ok=True) # f-string格式化字符串 os.makedirs('checkpoints', exist_ok=True) def save_sample(batch): transformer.eval() with torch.no_grad(): output = transformer(image_samples.to(device)) img_grid = denormalize( torch.cat((image_samples.cpu(), output.cpu()), 2)) save_image(img_grid, f"images/outputs/{style_name}-training/{batch}.jpg", nrow=4) transformer.train() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_dataset = datasets.ImageFolder(args.dataset_path, train_transform(args.image_size)) dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) transformer = TransformerNet().to(device) vgg = VGG16(requires_grad=False).to(device) if args.checkpoint_model: transformer.load_state_dict(torch.load(args.checkpoint_model)) optimizer = Adam(transformer.parameters(), lr=args.lr) l2_loss = nn.MSELoss().to(device) # load style image style = style_transform(args.style_img_size)(Image.open(args.style_image)) style = style.repeat(args.batch_size, 1, 1, 1).to(device) # style_image features style_features = vgg(style) gram_style = [gram(x) for x in style_features] # visualization the image image_samples = [] for path in random.sample( glob.glob(f'{args.dataset_path}/*/*.{args.sample_format}'), 8): image_samples += [ style_transform( (args.image_size, args.image_size))(Image.open(path)) ] image_samples = torch.stack(image_samples) c_loss = 0 s_loss = 0 t_loss = 0 for epoch in range(args.epochs): for i, (img, _) in enumerate(dataloader): optimizer.zero_grad() image_original = img.to(device) image_transformed = transformer(image_original) origin_features = vgg(image_original) transformed_features = vgg(image_transformed) content_loss = args.lambda_content * l2_loss( transformed_features.relu_2_2, origin_features.relu_2_2) style_loss = 0 for ii, jj in zip(transformed_features, gram_style): gram_t_features = gram(ii) style_loss += l2_loss(gram_t_features, jj[:img.size(), :, :]) # buyiyang style_loss *= args.lambda_style loss = content_loss + style_loss loss.backward() optimizer.step() c_loss += content_loss.item() s_loss += style_loss.item() t_loss += loss.item() print( '[Epoch %d/%d] [Batch %d/%d] [Content: %.2f (%.2f) Style: %.2f (%.2f) Total: %.2f (%.2f)]' % ( epoch + 1, args.epochs, i, len(train_dataset), content_loss.item(), np.mean(c_loss), style_loss.item(), np.mean(s_loss), loss.item(), np.mean(t_loss), )) batches_done = epoch * len(dataloader) + i + 1 if batches_done % args.sample_interval == 0: save_sample(batches_done) if args.checkpoint_interval > 0 and batches_done % args.checkpoint_interval == 0: style_name = os.path.basename(args.style_image).split(".")[0] torch.save(transformer.state_dict(), f"checkpoints/{style_name}_{batches_done}.pth")
def train_new_style(style_img_path, style_model_path): ImageFile.LOAD_TRUNCATED_IMAGES = True # Basic params settings dataset_path = "datasets" # 此处为coco14数据集的地址 epochs = 1 batch_size = 4 # max_train_batch = 20000 image_size = 256 style_size = None # 以下三个参数值可能需要修改 # 1. 1e3 1e6 1 ep=24000 # 2. 1e2 1e5 0.5 ep=18000 # 3. 5e1 5e4 0.01 ep=max lr=1e-4 # 原论文lua实现中为1.0,5.0,1e-6 # tensorflow版本中为7.5(15),100 lambda_content = float(5e1) lambda_style = float(5e4) lambda_tv = float(0.01) lr = float(1e-4) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create dataloader for the training data train_dataset = datasets.ImageFolder(dataset_path, train_transform(image_size)) dataloader = DataLoader(train_dataset, batch_size=batch_size) # Defines networks transformer = TransformerNet().to(device) vgg = VGG16(requires_grad=False).to(device) # Define optimizer and loss optimizer = Adam(transformer.parameters(), lr) l2_loss = torch.nn.MSELoss().to(device) # Load style image style = style_transform(style_size)(Image.open(style_img_path)) style = style.repeat(batch_size, 1, 1, 1).to(device) # Extract style features features_style = vgg(style) gram_style = [gram_matrix(y) for y in features_style] for epoch in range(epochs): # epoch_metrics = {"content": [], "style": [], "total": []} for batch_i, (images, _) in enumerate(dataloader): optimizer.zero_grad() images_original = images.to(device) images_transformed = transformer(images_original) # Extract features features_original = vgg(images_original) features_transformed = vgg(images_transformed) # Compute content loss as MSE between features content_size = features_transformed.relu2_2.shape[0]*features_transformed.relu2_2.shape[1] * \ features_transformed.relu2_2.shape[2] * \ features_transformed.relu2_2.shape[3] content_loss = lambda_content*2 * \ l2_loss(features_transformed.relu2_2, features_original.relu2_2) content_loss /= content_size # Compute style loss as MSE between gram matrices style_loss = 0 for ft_y, gm_s in zip(features_transformed, gram_style): gm_y = gram_matrix(ft_y) gm_size = gm_y.shape[0] * gm_y.shape[1] * gm_y.shape[2] style_loss += l2_loss(gm_y, gm_s[:images.size(0), :, :]) / gm_size style_loss *= lambda_style * 2 # Compute tv loss y_tv = l2_loss(images_transformed[:, :, 1:, :], images_transformed[:, :, :image_size - 1, :]) x_tv = l2_loss(images_transformed[:, :, :, 1:], images_transformed[:, :, :, :image_size - 1]) tv_loss = lambda_tv*2 * \ (x_tv/image_size + y_tv/image_size)/batch_size total_loss = content_loss + style_loss + tv_loss total_loss.backward() optimizer.step() # Save trained model torch.save(transformer.state_dict(), style_model_path)
def main(): uploaded_file = st.file_uploader( "Choose a picc", type=['jpg', 'png', 'webm', 'mp4', 'gif', 'jpeg']) if uploaded_file is not None: st.image(uploaded_file, width=200) folder = os.path.abspath(os.getcwd()) folder = folder + '/models' fnames = [] for basename in os.listdir(folder): print(basename) fname = os.path.join(folder, basename) if fname.endswith('.pth'): fnames.append(fname) checkpoint = st.selectbox('Select a pretrained model', fnames) # parser = argparse.ArgumentParser() # parser.add_argument("--image_path", type=str, required=True, help="Path to image") # parser.add_argument("--checkpoint_model", type=str, required=True, help="Path to checkpoint model") # args = parser.parse_args() # print(args) os.makedirs("images/outputs", exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device = torch.device("cpu") transform = style_transform() try: # Define model and load model checkpoint transformer = TransformerNet().to(device) transformer.load_state_dict(torch.load(checkpoint)) transformer.eval() # Prepare input image_tensor = Variable( transform(Image.open(uploaded_file).convert('RGB'))).to(device) image_tensor = image_tensor.unsqueeze(0) # Stylize image with torch.no_grad(): stylized_image = denormalize(transformer(image_tensor)).cpu() # colormaps = ['Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', 'rainbow_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 'winter', 'winter_r'] # colormap = st.selectbox('Select a colormap', colormaps) # plt.imshow(stylized_image.numpy()[0][0], cmap=colormap) # plt.imshow(stylized_image.numpy()[0][0], cmap='gist_rainbow') # img = np.squeeze(stylized_image) # plt.imshow(img[0]) # plt.show() # st.image(img) # # Save image fn = str(np.random.randint(0, 100)) + 'image.jpg' save_image(stylized_image, f"images/outputs/stylized-{fn}") st.image(f"images/outputs/stylized-{fn}") except: st.write('Choose an image')
import torch import torchvision from torchvision import datasets, models, transforms from models import TransformerNet, VGG16 import coremltools as ct import urllib from PIL import Image import matplotlib.pyplot as plt from torchvision.utils import save_image import numpy as np net = TransformerNet() net.load_state_dict( torch.load(f"starry_night_10000.pth", map_location=torch.device('cpu'))) net.eval() x = torch.rand(1, 3, 512, 512) traced_model = torch.jit.trace(net, x) model = ct.convert(traced_model, inputs=[ct.ImageType(name="input_image", shape=x.shape)]) model.save("starry_night.mlmodel")
import sys from torchvision.utils import save_image from PIL import Image if __name__ == "__main__": image_path = sys.argv[1] model_path = sys.argv[2] os.makedirs("images/outputs", exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") transform = style_transform() # 定义生成网络并且加载训练好的模型 transformer = TransformerNet().to(device) transformer.load_state_dict(torch.load(model_path)) transformer.eval() # 将要进行转换的内容图片进行张量化准备 image_tensor = Variable(transform(Image.open(image_path))).to(device) image_tensor = image_tensor.unsqueeze(0) # 进行风格迁移 with torch.no_grad(): stylized_image = denormalize(transformer(image_tensor)).cpu() # 保存图片 fn = image_path.split("/")[-1] sn = model_path.split("/")[-1] save_image(stylized_image, f"images/outputs/{sn}-{fn}")
def main(): #Upload images uploaded_file = st.file_uploader("Choose a picture", type=['jpg', 'png']) # if uploaded_file is not None: # st.image(uploaded_file, width=200) second_uploaded_file = st.file_uploader("Choose another picture", type=['jpg', 'png']) # if second_uploaded_file is not None: # st.image(second_uploaded_file, width=200) try: image1 = Image.open(uploaded_file) image2 = Image.open(second_uploaded_file) image1_arr = np.array(image1) image2_arr = np.array(image2) print(image1_arr.shape) print(image2_arr.shape) show_file = st.empty() show_file1 = st.empty() show_file2 = st.empty() show_file3 = st.empty() show_file4 = st.empty() show_file5 = st.empty() if not uploaded_file: show_file.info('Please upload a file') return show_file.title('Input Images') show_file1.image(uploaded_file, width=100) show_file2.title('+') show_file3.image(second_uploaded_file, width=100) show_file4.title('=') show_file5.image(image1, width=300) # Read images to opencv # src_img = cv2.imencode('jpg', image1) # dst_img = cv2.imencode('jpg',image2) except: show_file = st.empty() show_file.info('Please upload a file') os.makedirs("images/outputs", exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") transform = style_transform() # Define model and load model checkpoint transformer = TransformerNet().to(device) transformer.load_state_dict( torch.load( '/home/nick/Downloads/faceswap_app/models/starry_night_10000.pth')) transformer.eval() # Prepare input image_tensor = Variable(transform(Image.open(uploaded_file))).to(device) image_tensor = image_tensor.unsqueeze(0) # Stylize image with torch.no_grad(): stylized_image = denormalize(transformer(image_tensor)).cpu() # Save image fn = args.image_path.split("/")[-1] save_image(stylized_image, f"images/outputs/stylized-{fn}")
import coremltools as ct import urllib from PIL import Image import matplotlib.pyplot as plt from torchvision.utils import save_image import numpy as np def denormalize(tensors): """ Denormalizes image tensors using mean and std """ for c in range(3): tensors[:, c].mul_(std[c]).add_(mean[c]) return tensors net = TransformerNet() mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) def style(style, filename): input_image = Image.open(f"{filename}.jpg") preprocess = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) input_tensor = preprocess(input_image)
def index(): if request.method == 'POST': f = request.files['image'] filename = f.filename path = os.path.join(UPLOAD_FOLDER, filename) f.save(path) style_img = path a = int(Image.open(style_img).size[0]) b = int(Image.open(style_img).size[1]) device = torch.device("cpu") transform = style_transform() transformer = TransformerNet().to(device) model = request.form['style'] if model == 'mosaic': transformer.load_state_dict( torch.load("static/model/mosaic_10000.pth", map_location=torch.device('cpu'))) transformer.eval() filename = "mosaic{}".format(filename) elif model == 'mona': transformer.load_state_dict( torch.load("static/model/mona_24000.pth", map_location=torch.device('cpu'))) transformer.eval() filename = "mona{}".format(filename) elif model == 'starry': transformer.load_state_dict( torch.load("static/model/starry_night_10000.pth", map_location=torch.device('cpu'))) transformer.eval() filename = "starry{}".format(filename) # Prepare input if a * b < 800000: image_tensor = Variable( transform(Image.open(style_img).convert("RGB"))).to(device) image_tensor = image_tensor.unsqueeze(0) elif 800000 < a * b < 1960000: image_tensor = Variable( transform( Image.open(style_img).convert("RGB").resize( (int(a * 2 / 3), int(b * 2 / 3))))).to(device) image_tensor = image_tensor.unsqueeze(0) else: image_tensor = Variable( transform( Image.open(style_img).convert("RGB").resize( (int(a / 2), int(b / 2))))).to(device) image_tensor = image_tensor.unsqueeze(0) with torch.no_grad(): stylized_image = denormalize(transformer(image_tensor)).cpu() save_image(stylized_image, "./static/predict/result_{}".format(filename)) # stylized_image.save("./static/predict/{}".format(filename)) #prediction pass to pipeline model return render_template("index.html", fileupload=True, img_name="result_" + filename) return render_template("index.html", fileupload=False)
help="Path to video") parser.add_argument("--checkpoint_model", type=str, required=True, help="Path to checkpoint model") args = parser.parse_args() print(args) os.makedirs("images/outputs", exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") transform = style_transform() # Define model and load model checkpoint transformer = TransformerNet().to(device) transformer.load_state_dict(torch.load(args.checkpoint_model, 'cpu')) transformer.eval() stylized_frames = [] for frame in tqdm.tqdm(extract_frames(args.video_path), desc="Processing frames"): # Prepare input frame image_tensor = Variable(transform(frame)).to(device).unsqueeze(0) # Stylize image with torch.no_grad(): stylized_image = transformer(image_tensor) # Add to frames stylized_frames += [deprocess(stylized_image)] # Create video from frames
import torch from utils import * from models import TransformerNet import os from torchvision import transforms import time import cv2 from cv2 import VideoWriter, VideoWriter_fourcc TITLE = 'gogh' STYLE_TRANSFORM_PATH = "gogh.pth" device = ("cuda" if torch.cuda.is_available() else "cpu") net = TransformerNet() net.load_state_dict( torch.load(STYLE_TRANSFORM_PATH, map_location=torch.device(device))) net = net.to(device) videofile = "input.avi" #videofile = 0 cap = cv2.VideoCapture(videofile) if videofile is not 0: w = int(cap.get(3)) h = int(cap.get(4)) fps = cap.get(5) fourcc = VideoWriter_fourcc(*'mp4v') writer = VideoWriter('output.mp4', fourcc, fps, (w, h)) count = 0 while cap.isOpened():