def __init__(self, args): """Constructor prepares the model and loads the original images. """ super(StyleTransfer, self).__init__() # Setup state for random number generator for deterministic results. if args.seed_random is not None: torch.manual_seed(args.seed_random) # Load the convolution network from pre-trained parameters. self.device = torch.device(args.device) self.model = classifiers.VGG19Encoder().to(self.device) # Load the content image from disk or create an empty tensor. if args.content is not None: self.content_img = images.load_from_file(args.content, self.device) else: args.content_weights, args.content_layers = [], [] h, w = reversed(list(map(int, args.output_size.split('x')))) self.content_img = torch.zeros((1, 3, h, w), device=self.device) # Load the style image from disk to be processed during optimization. if args.style is not None: self.style_img = images.load_from_file(args.style, self.device) print(args.style_layers) args.style_layers = args.style_layers.split(',') else: args.style_weights, args.style_layers = [], [] self.style_img = None self.seed_img = None if args.histogram_layers is not '': args.histogram_layers = args.histogram_layers.split(',') args.histogram_weights = [ float(w) for w in args.histogram_weights.split(',') ] # Preprocess the various loss weights and decide which layers need to be computed. self.args = args #if args.style is not None: print(self.args.style_weights) self.args.style_weights = [ float(w) * self.args.style_multiplier for w in self.args.style_weights.split(',') ] print(self.args.style_weights) self.all_layers = set(self.args.content_layers) | set( self.args.style_layers) | set(self.args.histogram_layers) print(self.args.content_layers) print(self.args.style_layers) print(self.args.histogram_layers)
def __init__(self, args): """Constructor prepares the model and loads the original images. """ super(StyleTransfer, self).__init__() # Setup state for random number generator for deterministic results. if args.seed_random is not None: torch.manual_seed(args.seed_random) # Load the convolution network from pre-trained parameters. self.cuda = args.device == 'cuda' self.device = torch.device(args.device) if args.model == "imagenet": self.model = classifiers.VGG19Encoder(pooling=args.pooling).to(self.device) elif args.model == "places": self.model = classifiers.VGG16Encoder(fn="data/vgg16places_enc.model", pooling=args.pooling).to(self.device) elif args.model == "placesFC": self.model = classifiers.VGG16FCEncoder(fn="data/vgg16places_fc_enc.model", pooling=args.pooling).to(self.device) elif args.model == "stylized": self.model = classifiers.VGG16Encoder(fn="data/vgg16stylized_enc.model", pooling=args.pooling).to(self.device) else: print("Unknown model: "+args.model) exit() print("Running on "+args.device) # Load the content image from disk or create an empty tensor. if args.content is not None: content_size = None if args.content_size is not None: w, h = reversed(list(map(int, args.content_size.split('x')))) content_size = (w,h) print("Content image resized to h={}, w={}".format(h,w)) self.content_imgs = [] content_files = args.content.split(',') print("Content taken from: ", content_files) for f in content_files: self.content_imgs.append(images.load_from_file(f, self.device, size=content_size)) else: args.content_weights, args.content_layers = [], [] w, h = reversed(list(map(int, args.output_size.split('x')))) self.content_imgs = [torch.zeros((1, 3, h, w), device=self.device)] # Load the style image from disk to be processed during optimization. if args.style is not None: style_size = None if args.style_size is not None: w, h = reversed(list(map(int, args.style_size.split('x')))) style_size = (w,h) print("Style image resized to h={}, w={}".format(h,w)) if args.style.startswith("@content"): args.style = args.content + args.style.replace("@content", "") style_files = args.style.split(',') print("Style taken from: ",style_files) self.style_imgs = [] for f in style_files: self.style_imgs.append(images.load_from_file(f, self.device, size = style_size)) self.style_multipliers = [float(w) for w in args.style_multiplier.split(',')] if len(self.style_multipliers) == 1: self.style_multipliers = self.style_multipliers * len(self.style_imgs) args.style_layers = args.style_layers.split(',') else: args.style_weights, args.style_layers = [], [] self.style_img = None self.seed_img = None if args.histogram_layers is not '': assert args.device == "cuda", "Histogram currently only supported on GPU" args.histogram_layers = args.histogram_layers.split(',') args.histogram_weights = [float(w) for w in args.histogram_weights.split(',')] # Preprocess the various loss weights and decide which layers need to be computed. self.args = args #if args.style is not None: self.args.style_weights = [float(w) for w in self.args.style_weights.split(',')] self.all_layers = set(self.args.content_layers) | set(self.args.style_layers) | set(self.args.histogram_layers) self.iterations = self.args.iterations if len(self.iterations) == 1 and args.scales > 1: self.iterations = self.iterations * args.scales print(self.iterations)
def run(self): """Main entry point for style transfer, operates coarse-to-fine as specified by the number of scales. """ for self.scale in range(0, self.args.scales): # Pre-process the input images so they have the expected size. factor = 2 ** (self.args.scales - self.scale - 1) content_imgs = [] for img in self.content_imgs: content_imgs.append(resize.DownscaleBuilder(factor, cuda=self.cuda).build(img)) style_imgs = [] for img in self.style_imgs: style_imgs.append(resize.DownscaleBuilder(factor, cuda=self.cuda).build(img)) # Determine the stating point for the optimizer, was there an output of previous scale? if self.seed_img is None: # a) Load an image from disk, this needs to be the exact right size. if self.args.seed is not None: seed_img = images.load_from_file(self.args.seed, self.device) #seed_img = resize.DownscaleBuilder(factor).build(self.seed_img) #print(seed_img.shape, content_img.shape) assert seed_img.shape == content_imgs[0].shape # b) Use completely random buffer from a normal distribution. else: seed_img = torch.empty_like(content_imgs[0]).normal_(std=0.5).clamp_(-2.0, +2.0) else: # c) There was a previous scale, so resize and add noise from normal distribution. seed_img = (resize.DownscaleBuilder(factor, cuda=self.cuda).build(self.seed_img) + torch.empty_like(content_imgs[0]).normal_(std=0.1)).clamp_(-2.0, +2.0) # Pre-compute the cross-correlation statistics for the style image layers (aka. gram matrices). self.style_gram = {} n = 0 for img in style_imgs: for i, f in self.model.extract(img, layers=self.args.style_layers): self.style_gram[n, i] = histogram.square_matrix(f - 1.0).detach() n = n + 1 # Pre-compute feature histograms for the style image layers specified. self.style_hist = {} n = 0 for img in style_imgs: for k, v in self.model.extract(img, layers=self.args.histogram_layers): self.style_hist[n, k] = histogram.extract_histograms(v, bins=5, min=torch.tensor(-1.0), max=torch.tensor(+4.0)) n = n + 1 # Prepare and store the content image activations for image layers too. self.content_feat = {} n = 0 for img in content_imgs: for i, f in self.model.extract(img, layers=self.args.content_layers): self.content_feat[n, i] = f.detach() n = n + 1 # Now run the optimization using L-BFGS starting from the seed image. output = self.optimize(seed_img, self.iterations[self.scale]) #, lr=0.2) # For the next scale, we'll reuse a biliniear interpolated version of this output. self.seed_img = resize.UpscaleBuilder(factor, mode='bilinear').build(output).detach() # Save the final image at the finest scale to disk. basename = os.path.splitext(os.path.basename(self.args.content or self.args.style))[0] images.save_to_file(self.image.clone().detach().cpu(), self.args.output or ('output/%s_final.png' % basename))