def get_infile(): pydiffvg.set_use_gpu(False) root = tk.Tk() #root.withdraw() file_path = filedialog.askopenfilename(initialdir = ".",title = "Select graphic to optimize",filetypes = (("SVG files","*.svg"),("all files","*.*"))) root.destroy() return file_path
import pydiffvg import torch import skimage import numpy as np # Use GPU if available pydiffvg.set_use_gpu(torch.cuda.is_available()) canvas_width, canvas_height = 256, 256 num_control_points = torch.tensor([2]) # points = torch.tensor([[120.0, 30.0], # base # [150.0, 60.0], # control point # [ 90.0, 198.0], # control point # [ 60.0, 218.0], # base # [ 90.0, 180.0], # control point # [200.0, 65.0], # control point # [210.0, 98.0], # base # [220.0, 70.0], # control point # [130.0, 55.0]]) # control point points = torch.tensor([ [20.0, 128.0], # base [50.0, 128.0], # control point [170.0, 128.0], # control point [200.0, 128.0] ]) # base path = pydiffvg.Path(num_control_points=num_control_points, points=points, is_closed=False, stroke_width=torch.tensor(10.0)) shapes = [path] path_group = pydiffvg.ShapeGroup(shape_ids=torch.tensor([0]),
def main(args): pydiffvg.set_use_gpu(torch.cuda.is_available()) canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene( args.content_file) scene_args = pydiffvg.RenderFunction.serialize_scene( canvas_width, canvas_height, shapes, shape_groups) render = pydiffvg.RenderFunction.apply img = render( canvas_width, # width canvas_height, # height 2, # num_samples_x 2, # num_samples_y 0, # seed None, *scene_args) # Transform to gamma space pydiffvg.imwrite(img.cpu(), 'results/style_transfer/init.png', gamma=1.0) # HWC -> NCHW img = img.unsqueeze(0) img = img.permute(0, 3, 1, 2) # NHWC -> NCHW loader = transforms.Compose([transforms.ToTensor() ]) # transform it into a torch tensor def image_loader(image_name): image = Image.open(image_name) # fake batch dimension required to fit network's input dimensions image = loader(image).unsqueeze(0) return image.to(pydiffvg.get_device(), torch.float) style_img = image_loader(args.style_img) # alpha blend content with a gray background content_img = img[:, :3, :, :] * img[:, 3, :, :] + \ 0.5 * torch.ones([1, 3, img.shape[2], img.shape[3]]) * \ (1 - img[:, 3, :, :]) assert style_img.size() == content_img.size(), \ "we need to import style and content images of the same size" # unloader = transforms.ToPILImage() # reconvert into PIL image class ContentLoss(nn.Module): def __init__( self, target, ): super(ContentLoss, self).__init__() # we 'detach' the target content from the tree used # to dynamically compute the gradient: this is a stated value, # not a variable. Otherwise the forward method of the criterion # will throw an error. self.target = target.detach() def forward(self, input): self.loss = F.mse_loss(input, self.target) return input def gram_matrix(input): a, b, c, d = input.size() # a=batch size(=1) # b=number of feature maps # (c,d)=dimensions of a f. map (N=c*d) features = input.view(a * b, c * d) # resise F_XL into \hat F_XL G = torch.mm(features, features.t()) # compute the gram product # we 'normalize' the values of the gram matrix # by dividing by the number of element in each feature maps. return G.div(a * b * c * d) class StyleLoss(nn.Module): def __init__(self, target_feature): super(StyleLoss, self).__init__() self.target = gram_matrix(target_feature).detach() def forward(self, input): G = gram_matrix(input) self.loss = F.mse_loss(G, self.target) return input device = pydiffvg.get_device() cnn = models.vgg19(pretrained=True).features.to(device).eval() cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device) cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device) # create a module to normalize input image so we can easily put it in a # nn.Sequential class Normalization(nn.Module): def __init__(self, mean, std): super(Normalization, self).__init__() # .view the mean and std to make them [C x 1 x 1] so that they can # directly work with image Tensor of shape [B x C x H x W]. # B is batch size. C is number of channels. H is height and W is width. self.mean = mean.clone().view(-1, 1, 1) self.std = std.clone().view(-1, 1, 1) def forward(self, img): # normalize img return (img - self.mean) / self.std # desired depth layers to compute style/content losses : content_layers_default = ['conv_4'] style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5'] def get_style_model_and_losses(cnn, normalization_mean, normalization_std, style_img, content_img, content_layers=content_layers_default, style_layers=style_layers_default): cnn = copy.deepcopy(cnn) # normalization module normalization = Normalization(normalization_mean, normalization_std).to(device) # just in order to have an iterable access to or list of content/syle # losses content_losses = [] style_losses = [] # assuming that cnn is a nn.Sequential, so we make a new nn.Sequential # to put in modules that are supposed to be activated sequentially model = nn.Sequential(normalization) i = 0 # increment every time we see a conv for layer in cnn.children(): if isinstance(layer, nn.Conv2d): i += 1 name = 'conv_{}'.format(i) elif isinstance(layer, nn.ReLU): name = 'relu_{}'.format(i) # The in-place version doesn't play very nicely with the ContentLoss # and StyleLoss we insert below. So we replace with out-of-place # ones here. layer = nn.ReLU(inplace=False) elif isinstance(layer, nn.MaxPool2d): name = 'pool_{}'.format(i) elif isinstance(layer, nn.BatchNorm2d): name = 'bn_{}'.format(i) else: raise RuntimeError('Unrecognized layer: {}'.format( layer.__class__.__name__)) model.add_module(name, layer) if name in content_layers: # add content loss: target = model(content_img).detach() content_loss = ContentLoss(target) model.add_module("content_loss_{}".format(i), content_loss) content_losses.append(content_loss) if name in style_layers: # add style loss: target_feature = model(style_img).detach() style_loss = StyleLoss(target_feature) model.add_module("style_loss_{}".format(i), style_loss) style_losses.append(style_loss) # now we trim off the layers after the last content and style losses for i in range(len(model) - 1, -1, -1): if isinstance(model[i], ContentLoss) or isinstance( model[i], StyleLoss): break model = model[:(i + 1)] return model, style_losses, content_losses def run_style_transfer(cnn, normalization_mean, normalization_std, content_img, style_img, canvas_width, canvas_height, shapes, shape_groups, num_steps=500, style_weight=5000, content_weight=1): """Run the style transfer.""" print('Building the style transfer model..') model, style_losses, content_losses = get_style_model_and_losses( cnn, normalization_mean, normalization_std, style_img, content_img) point_params = [] color_params = [] stroke_width_params = [] for shape in shapes: if isinstance(shape, pydiffvg.Path): point_params.append(shape.points.requires_grad_()) stroke_width_params.append(shape.stroke_width.requires_grad_()) for shape_group in shape_groups: if isinstance(shape_group.fill_color, torch.Tensor): color_params.append(shape_group.fill_color.requires_grad_()) elif isinstance(shape_group.fill_color, pydiffvg.LinearGradient): point_params.append( shape_group.fill_color.begin.requires_grad_()) point_params.append( shape_group.fill_color.end.requires_grad_()) color_params.append( shape_group.fill_color.stop_colors.requires_grad_()) if isinstance(shape_group.stroke_color, torch.Tensor): color_params.append(shape_group.stroke_color.requires_grad_()) elif isinstance(shape_group.stroke_color, pydiffvg.LinearGradient): point_params.append( shape_group.stroke_color.begin.requires_grad_()) point_params.append( shape_group.stroke_color.end.requires_grad_()) color_params.append( shape_group.stroke_color.stop_colors.requires_grad_()) point_optimizer = optim.Adam(point_params, lr=1.0) color_optimizer = optim.Adam(color_params, lr=0.01) stroke_width_optimizers = optim.Adam(stroke_width_params, lr=0.1) print('Optimizing..') run = [0] while run[0] <= num_steps: point_optimizer.zero_grad() color_optimizer.zero_grad() stroke_width_optimizers.zero_grad() scene_args = pydiffvg.RenderFunction.serialize_scene( canvas_width, canvas_height, shapes, shape_groups) render = pydiffvg.RenderFunction.apply img = render( canvas_width, # width canvas_height, # height 2, # num_samples_x 2, # num_samples_y 0, # seed None, *scene_args) # alpha blend img with a gray background img = img[:, :, :3] * img[:, :, 3:4] + \ 0.5 * torch.ones([img.shape[0], img.shape[1], 3]) * \ (1 - img[:, :, 3:4]) pydiffvg.imwrite(img.cpu(), 'results/style_transfer/step_{}.png'.format( run[0]), gamma=1.0) # HWC to NCHW img = img.permute([2, 0, 1]).unsqueeze(0) model(img) style_score = 0 content_score = 0 for sl in style_losses: style_score += sl.loss for cl in content_losses: content_score += cl.loss style_score *= style_weight content_score *= content_weight loss = style_score + content_score loss.backward() run[0] += 1 if run[0] % 1 == 0: print("run {}:".format(run)) print('Style Loss : {:4f} Content Loss: {:4f}'.format( style_score.item(), content_score.item())) print() point_optimizer.step() color_optimizer.step() stroke_width_optimizers.step() for color in color_params: color.data.clamp_(0, 1) for w in stroke_width_params: w.data.clamp_(0.5, 4.0) return shapes, shape_groups shapes, shape_groups = run_style_transfer(cnn, cnn_normalization_mean, cnn_normalization_std, content_img, style_img, canvas_width, canvas_height, shapes, shape_groups) scene_args = pydiffvg.RenderFunction.serialize_scene(shapes, shape_groups) render = pydiffvg.RenderFunction.apply img = render( canvas_width, # width canvas_height, # height 2, # num_samples_x 2, # num_samples_y 0, # seed None, *scene_args) # Transform to gamma space pydiffvg.imwrite(img.cpu(), 'results/style_transfer/output.png', gamma=1.0)
def train(args): th.manual_seed(0) np.random.seed(0) pydiffvg.set_use_gpu(args.cuda) # Initialize datasets imsize = 28 dataset = Dataset(args.data_dir, imsize) dataloader = DataLoader(dataset, batch_size=args.bs, num_workers=4, shuffle=True) if args.generator in ["vae", "ae"]: LOG.info("Vector config:\n samples %d\n" " paths: %d\n segments: %d\n" " zdim: %d\n" " conditional: %d\n" " fc: %d\n", args.samples, args.paths, args.segments, args.zdim, args.conditional, args.fc) model_params = dict(samples=args.samples, paths=args.paths, segments=args.segments, conditional=args.conditional, zdim=args.zdim, fc=args.fc) if args.generator == "vae": model = VectorMNISTVAE(variational=True, **model_params) chkpt = VAE_OUTPUT name = "mnist_vae" elif args.generator == "ae": model = VectorMNISTVAE(variational=False, **model_params) chkpt = AE_OUTPUT name = "mnist_ae" else: raise ValueError("unknown generator") if args.conditional: name += "_conditional" chkpt += "_conditional" if args.fc: name += "_fc" chkpt += "_fc" # Resume from checkpoint, if any checkpointer = ttools.Checkpointer( chkpt, model, meta=model_params, prefix="g_") extras, meta = checkpointer.load_latest() if meta is not None and meta != model_params: LOG.info("Checkpoint's metaparams differ from CLI, aborting: %s and %s", meta, model_params) # Hook interface if args.generator in ["vae", "ae"]: variational = args.generator == "vae" if variational: LOG.info("Using a VAE") else: LOG.info("Using an AE") interface = VAEInterface(model, lr=args.lr, cuda=args.cuda, variational=variational, w_kld=args.kld_weight) trainer = ttools.Trainer(interface) # Add callbacks keys = ["loss_g", "loss_d"] if args.generator == "vae": keys = ["kld", "data_loss", "loss"] elif args.generator == "ae": keys = ["data_loss", "loss"] port = 8097 trainer.add_callback(ttools.callbacks.ProgressBarCallback( keys=keys, val_keys=keys)) trainer.add_callback(ttools.callbacks.VisdomLoggingCallback( keys=keys, val_keys=keys, env=name, port=port)) trainer.add_callback(MNISTCallback( env=name, win="samples", port=port, frequency=args.freq)) trainer.add_callback(ttools.callbacks.CheckpointingCallback( checkpointer, max_files=2, interval=600, max_epochs=50)) # Start training trainer.train(dataloader, num_epochs=args.num_epochs)
default=16, type=int, help="number of output to compute") parser.add_argument("--imsize", type=int, help="if provided, override the raster output " "resolution") parser.add_argument("--nsteps", default=9, type=int, help="number of " "interpolation steps for the interpolation") parser.add_argument("--nframes", default=120, type=int, help="number of " "frames for the interpolation video") parser.add_argument("--invert", default=False, action="store_true", help="if True, render black on white rather than the" " opposite") args = parser.parse_args() pydiffvg.set_use_gpu(False) ttools.set_logger(False) run(args)
def main(args): # Use GPU if available pydiffvg.set_use_gpu(torch.cuda.is_available()) perception_loss = ttools.modules.LPIPS().to(pydiffvg.get_device()) #target = torch.from_numpy(skimage.io.imread('imgs/lena.png')).to(torch.float32) / 255.0 target = torch.from_numpy(skimage.io.imread(args.target)).to( torch.float32) / 255.0 target = target.pow(gamma) target = target.to(pydiffvg.get_device()) target = target.unsqueeze(0) target = target.permute(0, 3, 1, 2) # NHWC -> NCHW #target = torch.nn.functional.interpolate(target, size = [256, 256], mode = 'area') canvas_width, canvas_height = target.shape[3], target.shape[2] num_paths = args.num_paths max_width = args.max_width random.seed(1234) torch.manual_seed(1234) shapes = [] shape_groups = [] if args.use_blob: for i in range(num_paths): num_segments = random.randint(3, 5) num_control_points = torch.zeros(num_segments, dtype=torch.int32) + 2 points = [] p0 = (random.random(), random.random()) points.append(p0) for j in range(num_segments): radius = 0.05 p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5)) p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5)) p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5)) points.append(p1) points.append(p2) if j < num_segments - 1: points.append(p3) p0 = p3 points = torch.tensor(points) points[:, 0] *= canvas_width points[:, 1] *= canvas_height path = pydiffvg.Path(num_control_points=num_control_points, points=points, stroke_width=torch.tensor(1.0), is_closed=True) shapes.append(path) path_group = pydiffvg.ShapeGroup(shape_ids=torch.tensor( [len(shapes) - 1]), fill_color=torch.tensor([ random.random(), random.random(), random.random(), random.random() ])) shape_groups.append(path_group) else: for i in range(num_paths): num_segments = random.randint(1, 3) num_control_points = torch.zeros(num_segments, dtype=torch.int32) + 2 points = [] p0 = (random.random(), random.random()) points.append(p0) for j in range(num_segments): radius = 0.05 p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5)) p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5)) p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5)) points.append(p1) points.append(p2) points.append(p3) p0 = p3 points = torch.tensor(points) points[:, 0] *= canvas_width points[:, 1] *= canvas_height #points = torch.rand(3 * num_segments + 1, 2) * min(canvas_width, canvas_height) path = pydiffvg.Path(num_control_points=num_control_points, points=points, stroke_width=torch.tensor(1.0), is_closed=False) shapes.append(path) path_group = pydiffvg.ShapeGroup(shape_ids=torch.tensor( [len(shapes) - 1]), fill_color=None, stroke_color=torch.tensor([ random.random(), random.random(), random.random(), random.random() ])) shape_groups.append(path_group) scene_args = pydiffvg.RenderFunction.serialize_scene(\ canvas_width, canvas_height, shapes, shape_groups) render = pydiffvg.RenderFunction.apply img = render( canvas_width, # width canvas_height, # height 2, # num_samples_x 2, # num_samples_y 0, # seed None, *scene_args) pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/init.png', gamma=gamma) points_vars = [] stroke_width_vars = [] color_vars = [] for path in shapes: path.points.requires_grad = True points_vars.append(path.points) if not args.use_blob: for path in shapes: path.stroke_width.requires_grad = True stroke_width_vars.append(path.stroke_width) if args.use_blob: for group in shape_groups: group.fill_color.requires_grad = True color_vars.append(group.fill_color) else: for group in shape_groups: group.stroke_color.requires_grad = True color_vars.append(group.stroke_color) # Optimize points_optim = torch.optim.Adam(points_vars, lr=1.0) if len(stroke_width_vars) > 0: width_optim = torch.optim.Adam(stroke_width_vars, lr=0.1) color_optim = torch.optim.Adam(color_vars, lr=0.01) # Adam iterations. for t in range(args.num_iter): print('iteration:', t) points_optim.zero_grad() if len(stroke_width_vars) > 0: width_optim.zero_grad() color_optim.zero_grad() # Forward pass: render the image. scene_args = pydiffvg.RenderFunction.serialize_scene(\ canvas_width, canvas_height, shapes, shape_groups) img = render( canvas_width, # width canvas_height, # height 2, # num_samples_x 2, # num_samples_y t, # seed None, *scene_args) # Compose img with white background img = img[:, :, 3:4] * img[:, :, :3] + torch.ones( img.shape[0], img.shape[1], 3, device=pydiffvg.get_device()) * (1 - img[:, :, 3:4]) # Save the intermediate render. pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/iter_{}.png'.format(t), gamma=gamma) img = img[:, :, :3] # Convert img from HWC to NCHW img = img.unsqueeze(0) img = img.permute(0, 3, 1, 2) # NHWC -> NCHW if args.use_lpips_loss: loss = perception_loss( img, target) + (img.mean() - target.mean()).pow(2) else: loss = (img - target).pow(2).mean() print('render loss:', loss.item()) # Backpropagate the gradients. loss.backward() # Take a gradient descent step. points_optim.step() if len(stroke_width_vars) > 0: width_optim.step() color_optim.step() if len(stroke_width_vars) > 0: for path in shapes: path.stroke_width.data.clamp_(1.0, max_width) if args.use_blob: for group in shape_groups: group.fill_color.data.clamp_(0.0, 1.0) else: for group in shape_groups: group.stroke_color.data.clamp_(0.0, 1.0) if t % 10 == 0 or t == args.num_iter - 1: pydiffvg.save_svg( 'results/painterly_rendering/iter_{}.svg'.format(t), canvas_width, canvas_height, shapes, shape_groups) # Render the final result. img = render( target.shape[1], # width target.shape[0], # height 2, # num_samples_x 2, # num_samples_y 0, # seed None, *scene_args) # Save the intermediate render. pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/final.png'.format(t), gamma=gamma) # Convert the intermediate renderings to a video. from subprocess import call call([ "ffmpeg", "-framerate", "24", "-i", "results/painterly_rendering/iter_%d.png", "-vb", "20M", "results/painterly_rendering/out.mp4" ])
def gen_and_optimize(self, writer=None, color_optimisation_activated=False): # Thanks to Katherine Crowson for this. # In the CLIPDraw code used to generate examples, we don't normalize images # before passing into CLIP, but really you should. Turn this to True to do that. use_normalized_clip = True pydiffvg.set_print_timing(False) gamma = 1.0 # Use GPU if available pydiffvg.set_use_gpu(torch.cuda.is_available()) pydiffvg.set_device(device) max_width = 50 shapes, shape_groups = self.generator_func() # self.setup_parameters(colors) # Just some diffvg setup scene_args = pydiffvg.RenderFunction.serialize_scene( self.canvas_width, self.canvas_height, shapes, shape_groups) render = pydiffvg.RenderFunction.apply img = render(self.canvas_width, self.canvas_height, 2, 2, 0, None, *scene_args) background_image = torch.ones(img.shape) points_vars = [] for path in shapes: path.points.requires_grad = True points_vars.append(path.points) color_vars = list() for group in shape_groups: group.stroke_color.requires_grad = True color_vars.append(group.stroke_color) stroke_vars = list() for path in shapes: path.stroke_width.requires_grad = True stroke_vars.append(path.stroke_width) # Optimizers points_optim = torch.optim.Adam(points_vars, lr=1.0) color_optim = torch.optim.Adam(color_vars, lr=0.1) stroke_optim = torch.optim.Adam(stroke_vars, lr=0.01) # Run the main optimization loop #all_groups = sum([g.param_groups for g in [points_optim, color_optim, stroke_optim]], []) for t in range(self.num_iter): # Anneal learning rate (makes videos look cleaner) if t == int(self.num_iter * 0.5): print(f"Iter {t}") for g in points_optim.param_groups: g['lr'] *= 0.5 if t == int(self.num_iter * 0.75): print(f"Iter {t}") for g in points_optim.param_groups: g['lr'] *= 0.5 points_optim.zero_grad() if color_optimisation_activated: color_optim.zero_grad() stroke_optim.zero_grad() img = self.gen_image_from_curves(t, shapes, shape_groups, gamma, background_image) im_batch = self.data_augment(img, self.n_augms, use_normalized_clip) loss = self.forward_model_func(im_batch) # Back-propagate the gradients. loss.backward() # Take a gradient descent step. points_optim.step() if color_optimisation_activated: color_optim.step() stroke_optim.step() for path in shapes: path.stroke_width.data.clamp_(1.0, max_width) for group in shape_groups: group.stroke_color.data.clamp_(0.0, 1.0) if t % int(self.num_iter / 10) == 0 and writer is not None: writer.add_scalars("neuron_excitation", {"loss": loss}, t) writer.add_image('Rendering', img[0], t) return shapes, shape_groups
def main(args): if args.seed: np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) pydiffvg.set_print_timing(False) outdir = os.path.join(args.results_dir, args.prompt, args.subdir) # Use GPU if available pydiffvg.set_use_gpu(torch.cuda.is_available()) canvas_width, canvas_height = 224, 224 margin = args.initial_margin total_paths = args.open_paths + args.closed_paths step = min(args.step, total_paths) if step == 0: step = total_paths fill_color = None stroke_color = None shapes = [] shape_groups = [] losses = [] tt = 0 for num_paths in range(step, total_paths + 1, step): for i in range(num_paths - step, num_paths): num_segments = random.randint(1, args.extra_segments + 1) p0 = (margin + random.random() * (1 - 2 * margin), margin + random.random() * (1 - 2 * margin)) points = [p0] is_closed = i >= args.open_paths if is_closed: num_segments += 2 for j in range(num_segments): p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5)) p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5)) p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5)) points.append(p1) points.append(p2) if is_closed and j < num_segments - 1 or not is_closed: points.append(p3) p0 = p3 points = torch.tensor(points) points[:, 0] *= canvas_width points[:, 1] *= canvas_height stroke_width = torch.tensor(1.0) color = torch.tensor([ random.random(), random.random(), random.random(), random.random() ]) num_control_points = torch.zeros(num_segments, dtype=torch.int32) + 2 path = pydiffvg.Path(num_control_points=num_control_points, points=points, stroke_width=stroke_width, is_closed=is_closed) shapes.append(path) path_group = pydiffvg.ShapeGroup( shape_ids=torch.tensor([len(shapes) - 1]), fill_color=color if is_closed else None, stroke_color=None if is_closed else color) shape_groups.append(path_group) scene_args = pydiffvg.RenderFunction.serialize_scene(\ canvas_width, canvas_height, shapes, shape_groups) render = pydiffvg.RenderFunction.apply img = render( canvas_width, # width canvas_height, # height 2, # num_samples_x 2, # num_samples_y 0, # seed None, *scene_args) with warnings.catch_warnings(): warnings.simplefilter("ignore") pydiffvg.imwrite(img.cpu(), os.path.join(outdir, 'init.png'), gamma=gamma) points_vars = [] stroke_width_vars = [] color_vars = [] for path in shapes: path.points.requires_grad = True points_vars.append(path.points) if not path.is_closed and args.max_width > 1: path.stroke_width.requires_grad = True stroke_width_vars.append(path.stroke_width) for group in shape_groups: if group.fill_color is not None: group.fill_color.requires_grad = True color_vars.append(group.fill_color) else: group.stroke_color.requires_grad = True color_vars.append(group.stroke_color) # Embed prompt text_features = clip_utils.embed_text(args.prompt) # Optimize points_optim = torch.optim.Adam(points_vars, lr=args.points_lr) if len(stroke_width_vars) > 0: width_optim = torch.optim.Adam(stroke_width_vars, lr=args.width_lr) color_optim = torch.optim.Adam(color_vars, lr=args.color_lr) # Adam iterations. final = False this_step_iters = max(1, round(args.num_iter * step / total_paths)) if num_paths + step > total_paths: final = True this_step_iters += args.extra_iter for t in range(this_step_iters): points_optim.zero_grad() if len(stroke_width_vars) > 0: width_optim.zero_grad() color_optim.zero_grad() # Forward pass: render the image. scene_args = pydiffvg.RenderFunction.serialize_scene(\ canvas_width, canvas_height, shapes, shape_groups) img = render( canvas_width, # width canvas_height, # height 2, # num_samples_x 2, # num_samples_y tt, # seed None, *scene_args) # Save the intermediate render. with warnings.catch_warnings(): warnings.simplefilter("ignore") pydiffvg.imwrite(img.cpu(), os.path.join(outdir, 'iter_{}.png'.format(tt)), gamma=gamma) image_features = clip_utils.embed_image(img) loss = -torch.cosine_similarity( text_features, image_features, dim=-1).mean() # Backpropagate the gradients. loss.backward() losses.append(loss.item()) # Take a gradient descent step. points_optim.step() if len(stroke_width_vars) > 0: width_optim.step() color_optim.step() for path in shapes: path.points.data[:, 0].clamp_(0.0, canvas_width) path.points.data[:, 1].clamp_(0.0, canvas_height) if not path.is_closed: path.stroke_width.data.clamp_(1.0, args.max_width) for group in shape_groups: if group.fill_color is not None: group.fill_color.data[:3].clamp_(0.0, 1.0) group.fill_color.data[3].clamp_(args.min_alpha, 1.0) else: group.stroke_color.data[:3].clamp_(0.0, 1.0) group.stroke_color.data[3].clamp_(args.min_alpha, 1.0) if tt % 10 == 0 or final and t == this_step_iters - 1: print('%d loss=%.3f' % (tt, 1 + losses[-1])) pydiffvg.save_svg( os.path.join(outdir, 'iter_{}.svg'.format(tt)), canvas_width, canvas_height, shapes, shape_groups) clip_utils.plot_losses(losses, outdir) tt += 1 # Render the final result. img = render( args.final_px, # width args.final_px, # height 2, # num_samples_x 2, # num_samples_y 0, # seed None, *scene_args) # Save the intermediate render with warnings.catch_warnings(): warnings.simplefilter("ignore") pydiffvg.imwrite(img.cpu(), os.path.join(outdir, 'final.png'), gamma=gamma) # Convert the intermediate renderings to a video with a white background. from subprocess import call call([ "ffmpeg", "-framerate", "24", "-i", os.path.join(outdir, "iter_%d.png"), "-vb", "20M", "-filter_complex", "color=white,format=rgb24[c];[c][0]scale2ref[c][i];[c][i]overlay=format=auto:shortest=1,setsar=1", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-profile:v", "baseline", "-movflags", "+faststart", os.path.join(outdir, "out.mp4") ])
def main(): parser = argparse.ArgumentParser() parser.add_argument("--svg", default=os.path.join("imgs", "seamcarving", "hokusai.svg")) parser.add_argument("--optim_steps", default=10, type=int) parser.add_argument("--lr", default=1e-1, type=int) args = parser.parse_args() name = os.path.splitext(os.path.basename(args.svg))[0] root = os.path.join("results", "seam_carving", name) svg_root = os.path.join(root, "svg") os.makedirs(root, exist_ok=True) os.makedirs(os.path.join(root, "svg"), exist_ok=True) pydiffvg.set_use_gpu(False) # pydiffvg.set_device(th.device('cuda')) # Load SVG print("loading svg %s" % args.svg) canvas_width, canvas_height, shapes, shape_groups = \ pydiffvg.svg_to_scene(args.svg) print("done loading") max_size = 512 scale_factor = max_size / max(canvas_width, canvas_height) print("rescaling from %dx%d with scale %f" % (canvas_width, canvas_height, scale_factor)) canvas_width = int(canvas_width * scale_factor) canvas_height = int(canvas_height * scale_factor) print("new shape %dx%d" % (canvas_width, canvas_height)) vector_rescale(shapes, scale_x=scale_factor, scale_y=scale_factor) # Shrink image by 33 % # num_seams_to_remove = 2 num_seams_to_remove = canvas_width // 3 new_canvas_width = canvas_width - num_seams_to_remove scaling = new_canvas_width * 1.0 / canvas_width # Naive scaling baseline print("rendering naive rescaling...") vector_rescale(shapes, scale_x=scaling) resized = render(new_canvas_width, canvas_height, shapes, shape_groups) pydiffvg.imwrite(resized.cpu(), os.path.join(root, 'uniform_scaling.png'), gamma=2.2) pydiffvg.save_svg(os.path.join(svg_root, 'uniform_scaling.svg'), canvas_width, canvas_height, shapes, shape_groups, use_gamma=False) vector_rescale(shapes, scale_x=1.0 / scaling) # bring back original coordinates print("saved naiving scaling") # Save initial state print("rendering initial state...") im = render(canvas_width, canvas_height, shapes, shape_groups) pydiffvg.imwrite(im.cpu(), os.path.join(root, 'init.png'), gamma=2.2) pydiffvg.save_svg(os.path.join(svg_root, 'init.svg'), canvas_width, canvas_height, shapes, shape_groups, use_gamma=False) print("saved initial state") # Optimize # color_optim = th.optim.Adam(color_vars, lr=0.01) retargeted = im[..., :3].cpu().numpy() previous_width = canvas_width print("carving seams") for seam_idx in range(num_seams_to_remove): print('\nseam', seam_idx + 1, 'of', num_seams_to_remove) # Remove a seam retargeted = carve_seam(retargeted) current_width = canvas_width - seam_idx - 1 scale_factor = current_width * 1.0 / previous_width previous_width = current_width padded = np.zeros((canvas_height, canvas_width, 4)) padded[:, :-seam_idx - 1, :3] = retargeted padded[:, :-seam_idx - 1, -1] = 1.0 # alpha padded = th.from_numpy(padded).to(im.device) # Remap points to the smaller canvas and # collect variables to optimize points_vars = [] # width_vars = [] mini, maxi = canvas_width, 0 for path in shapes: path.points.requires_grad = False x = path.points[..., 0] y = path.points[..., 1] # rescale x = x * scale_factor # clip to canvas path.points[..., 0] = th.clamp(x, 0, current_width) path.points[..., 1] = th.clamp(y, 0, canvas_height) path.points.requires_grad = True points_vars.append(path.points) path.stroke_width.requires_grad = True # width_vars.append(path.stroke_width) mini = min(mini, path.points.min().item()) maxi = max(maxi, path.points.max().item()) print("points", mini, maxi, "scale", scale_factor) # recreate an optimizer so we don't carry over the previous update # (momentum)? geom_optim = th.optim.Adam(points_vars, lr=args.lr) for step in range(args.optim_steps): geom_optim.zero_grad() img = render(canvas_width, canvas_height, shapes, shape_groups, samples=2) pydiffvg.imwrite(img.cpu(), os.path.join( root, "seam_%03d_iter_%02d.png" % (seam_idx, step)), gamma=2.2) # NO alpha loss = (img - padded)[..., :3].pow(2).mean() # loss = (img - padded).pow(2).mean() print('render loss:', loss.item()) # Backpropagate the gradients. loss.backward() # Take a gradient descent step. geom_optim.step() pydiffvg.save_svg(os.path.join(svg_root, "seam%03d.svg" % seam_idx), canvas_width - seam_idx, canvas_height, shapes, shape_groups, use_gamma=False) for path in shapes: mini = min(mini, path.points.min().item()) maxi = max(maxi, path.points.max().item()) print("points", mini, maxi) img = render(canvas_width, canvas_height, shapes, shape_groups) img = img[:, :-num_seams_to_remove] pydiffvg.imwrite(img.cpu(), os.path.join(root, 'final.png'), gamma=2.2) pydiffvg.imwrite(retargeted, os.path.join(root, 'ref.png'), gamma=2.2) pydiffvg.save_svg(os.path.join(svg_root, 'final.svg'), canvas_width - num_seams_to_remove + 1, canvas_height, shapes, shape_groups, use_gamma=False) # Convert the intermediate renderings to a video. from subprocess import call call([ "ffmpeg", "-framerate", "24", "-i", os.path.join(root, "seam_%03d_iter_00.png"), "-vb", "20M", os.path.join(root, "out.mp4") ])
def main(args): # set device -> use cpu now since I haven't solved the nvcc issue pydiffvg.set_use_gpu(False) # pydiffvg.set_device(torch.device('cuda:1')) # use L2 for now # perception_loss = ttools.modules.LPIPS().to(pydiffvg.get_device()) # generate a texture synthesized target_img = texture_syn(args.target) tar_h, tar_w = target_img.shape[1], target_img.shape[0] canvas_width, canvas_height, shapes, shape_groups = \ pydiffvg.svg_to_scene(args.svg_path) # svgpathtools for checking the bounding box # paths, _, _ = svg2paths2(args.svg_path) # print(len(paths)) # xmin, xmax, ymin, ymax = big_bounding_box(paths) # print(xmin, xmax, ymin, ymax) # input("check") print('tar h : %d tar w : %d' % (tar_h, tar_w)) print('canvas h : %d canvas w : %d' % (canvas_height, canvas_width)) scale_ratio = tar_h / canvas_height print("scale ratio : ", scale_ratio) # input("check") for path in shapes: path.points[..., 0] = path.points[..., 0] * scale_ratio path.points[..., 1] = path.points[..., 1] * scale_ratio init_img = render(tar_w, tar_h, shapes, shape_groups) pydiffvg.imwrite(init_img.cpu(), 'results/texture_synthesis/%d/init.png' % (args.case), gamma=2.2) # input("check") random.seed(1234) torch.manual_seed(1234) points_vars = [] for path in shapes: path.points.requires_grad = True points_vars.append(path.points) color_vars = [] for group in shape_groups: group.fill_color.requires_grad = True color_vars.append(group.fill_color) # Optimize points_optim = torch.optim.Adam(points_vars, lr=1.0) color_optim = torch.optim.Adam(color_vars, lr=0.01) target = torch.from_numpy(target_img).to(torch.float32) / 255.0 target = target.pow(2.2) target = target.to(pydiffvg.get_device()) target = target.unsqueeze(0) target = target.permute(0, 3, 1, 2) # NHWC -> NCHW canvas_width, canvas_height = target.shape[3], target.shape[2] # print('canvas h : %d canvas w : %d' % (canvas_height, canvas_width)) # input("check") for t in range(args.max_iter): print('iteration:', t) points_optim.zero_grad() color_optim.zero_grad() cur_img = render(canvas_width, canvas_height, shapes, shape_groups) pydiffvg.imwrite(cur_img.cpu(), 'results/texture_synthesis/%d/iter_%d.png' % (args.case, t), gamma=2.2) cur_img = cur_img[:, :, :3] cur_img = cur_img.unsqueeze(0) cur_img = cur_img.permute(0, 3, 1, 2) # NHWC -> NCHW # perceptual loss # loss = perception_loss(cur_img, target) # l2 loss loss = (cur_img - target).pow(2).mean() print('render loss:', loss.item()) loss.backward() points_optim.step() color_optim.step() for group in shape_groups: group.fill_color.data.clamp_(0.0, 1.0) # write svg if t % 10 == 0 or t == args.max_iter - 1: pydiffvg.save_svg( 'results/texture_synthesis/%d/iter_%d.svg' % (args.case, t), canvas_width, canvas_height, shapes, shape_groups) # render final result final_img = render(tar_h, tar_w, shapes, shape_groups) pydiffvg.imwrite(final_img.cpu(), 'results/texture_synthesis/%d/final.png' % (args.case), gamma=2.2) from subprocess import call call([ "ffmpeg", "-framerate", "24", "-i", "results/texture_synthesis/%d/iter_%d.png" % (args.case), "-vb", "20M", "results/texture_synthesis/%d/out.mp4" % (args.case) ]) # make gif make_gif("results/texture_synthesis/%d" % (args.case), "results/texture_synthesis/%d/out.gif" % (args.case), frame_every_X_steps=1, repeat_ending=3, total_iter=args.max_iter)