Ejemplo n.º 1
0
    def __init__(self, args):
        """Constructor prepares the model and loads the original images. 
        """
        super(StyleTransfer, self).__init__()

        # Setup state for random number generator for deterministic results.
        if args.seed_random is not None:
            torch.manual_seed(args.seed_random)

        # Load the convolution network from pre-trained parameters.
        self.device = torch.device(args.device)
        self.model = classifiers.VGG19Encoder().to(self.device)

        # Load the content image from disk or create an empty tensor.
        if args.content is not None:
            self.content_img = images.load_from_file(args.content, self.device)
        else:
            args.content_weights, args.content_layers = [], []
            h, w = reversed(list(map(int, args.output_size.split('x'))))
            self.content_img = torch.zeros((1, 3, h, w), device=self.device)

        # Load the style image from disk to be processed during optimization.
        if args.style is not None:
            self.style_img = images.load_from_file(args.style, self.device)
            print(args.style_layers)
            args.style_layers = args.style_layers.split(',')
        else:
            args.style_weights, args.style_layers = [], []
            self.style_img = None

        self.seed_img = None

        if args.histogram_layers is not '':
            args.histogram_layers = args.histogram_layers.split(',')
            args.histogram_weights = [
                float(w) for w in args.histogram_weights.split(',')
            ]

        # Preprocess the various loss weights and decide which layers need to be computed.
        self.args = args
        #if args.style is not None:
        print(self.args.style_weights)
        self.args.style_weights = [
            float(w) * self.args.style_multiplier
            for w in self.args.style_weights.split(',')
        ]
        print(self.args.style_weights)
        self.all_layers = set(self.args.content_layers) | set(
            self.args.style_layers) | set(self.args.histogram_layers)

        print(self.args.content_layers)
        print(self.args.style_layers)
        print(self.args.histogram_layers)
Ejemplo n.º 2
0
    def __init__(self, args):
        """Constructor prepares the model and loads the original images. 
        """
        super(StyleTransfer, self).__init__()

        # Setup state for random number generator for deterministic results.
        if args.seed_random is not None:
            torch.manual_seed(args.seed_random)

        # Load the convolution network from pre-trained parameters.
        self.cuda = args.device == 'cuda'
        self.device = torch.device(args.device)
        if args.model == "imagenet":
            self.model = classifiers.VGG19Encoder(pooling=args.pooling).to(self.device)
        elif args.model == "places":
            self.model = classifiers.VGG16Encoder(fn="data/vgg16places_enc.model", pooling=args.pooling).to(self.device)
        elif args.model == "placesFC":
            self.model = classifiers.VGG16FCEncoder(fn="data/vgg16places_fc_enc.model", pooling=args.pooling).to(self.device)
        elif args.model == "stylized":
            self.model = classifiers.VGG16Encoder(fn="data/vgg16stylized_enc.model", pooling=args.pooling).to(self.device)
        else:
            print("Unknown model: "+args.model)
            exit()
        print("Running on "+args.device)

        # Load the content image from disk or create an empty tensor.
        if args.content is not None:
            content_size = None
            if args.content_size is not None:
                w, h = reversed(list(map(int, args.content_size.split('x'))))
                content_size = (w,h)
                print("Content image resized to h={}, w={}".format(h,w))
            self.content_imgs = []
            content_files = args.content.split(',')
            print("Content taken from: ", content_files)
            for f in content_files:
                self.content_imgs.append(images.load_from_file(f, self.device, size=content_size))
        else:
            args.content_weights, args.content_layers = [], []
            w, h = reversed(list(map(int, args.output_size.split('x'))))
            self.content_imgs = [torch.zeros((1, 3, h, w), device=self.device)]

        # Load the style image from disk to be processed during optimization.
        if args.style is not None:
            style_size = None
            if args.style_size is not None:
                w, h = reversed(list(map(int, args.style_size.split('x'))))
                style_size = (w,h)
                print("Style image resized to h={}, w={}".format(h,w))
            if args.style.startswith("@content"):
                args.style = args.content + args.style.replace("@content", "")
            style_files = args.style.split(',')
            print("Style taken from: ",style_files)
            self.style_imgs = []
            for f in style_files:
                self.style_imgs.append(images.load_from_file(f, self.device, size = style_size))
            self.style_multipliers = [float(w) for w in args.style_multiplier.split(',')]
            if len(self.style_multipliers) == 1:
                self.style_multipliers = self.style_multipliers * len(self.style_imgs)


            args.style_layers = args.style_layers.split(',')
        else:
            args.style_weights, args.style_layers = [], []
            self.style_img = None

        self.seed_img = None

        if args.histogram_layers is not '':
            assert args.device == "cuda", "Histogram currently only supported on GPU"
            args.histogram_layers = args.histogram_layers.split(',')
            args.histogram_weights = [float(w) for w in args.histogram_weights.split(',')]

        # Preprocess the various loss weights and decide which layers need to be computed.
        self.args = args
        #if args.style is not None:
        self.args.style_weights = [float(w) for w in self.args.style_weights.split(',')]
        self.all_layers = set(self.args.content_layers) | set(self.args.style_layers) | set(self.args.histogram_layers)

        self.iterations = self.args.iterations
        if len(self.iterations) == 1 and args.scales > 1:
            self.iterations = self.iterations * args.scales 
        print(self.iterations)
Ejemplo n.º 3
0
    def run(self):
        """Main entry point for style transfer, operates coarse-to-fine as specified by the number of scales.
        """

        for self.scale in range(0, self.args.scales):
            # Pre-process the input images so they have the expected size.
            factor = 2 ** (self.args.scales - self.scale - 1)
            content_imgs = []
            for img in self.content_imgs:
                content_imgs.append(resize.DownscaleBuilder(factor, cuda=self.cuda).build(img))
            style_imgs = []
            for img in self.style_imgs:
               style_imgs.append(resize.DownscaleBuilder(factor, cuda=self.cuda).build(img))

            # Determine the stating point for the optimizer, was there an output of previous scale?
            if self.seed_img is None:
                # a) Load an image from disk, this needs to be the exact right size.
                if self.args.seed is not None:
                    seed_img = images.load_from_file(self.args.seed, self.device)
                    #seed_img = resize.DownscaleBuilder(factor).build(self.seed_img)
                    #print(seed_img.shape, content_img.shape)
                    assert seed_img.shape == content_imgs[0].shape

                # b) Use completely random buffer from a normal distribution.
                else:
                    seed_img = torch.empty_like(content_imgs[0]).normal_(std=0.5).clamp_(-2.0, +2.0)
            else:
                # c) There was a previous scale, so resize and add noise from normal distribution. 
                seed_img = (resize.DownscaleBuilder(factor, cuda=self.cuda).build(self.seed_img)
                           + torch.empty_like(content_imgs[0]).normal_(std=0.1)).clamp_(-2.0, +2.0)

            # Pre-compute the cross-correlation statistics for the style image layers (aka. gram matrices).
            self.style_gram = {}
            n = 0
            for img in style_imgs: 
                for i, f in self.model.extract(img, layers=self.args.style_layers):
                    self.style_gram[n, i] = histogram.square_matrix(f - 1.0).detach()
                n = n + 1
            # Pre-compute feature histograms for the style image layers specified.
            self.style_hist = {}
            n = 0
            for img in style_imgs: 
              for k, v in self.model.extract(img, layers=self.args.histogram_layers):
                self.style_hist[n, k] = histogram.extract_histograms(v, bins=5, min=torch.tensor(-1.0), max=torch.tensor(+4.0))
              n = n + 1
            # Prepare and store the content image activations for image layers too.
            self.content_feat = {}
            n = 0
            for img in content_imgs:
                for i, f in self.model.extract(img, layers=self.args.content_layers):
                   self.content_feat[n, i] = f.detach()
                n = n + 1
            # Now run the optimization using L-BFGS starting from the seed image.
            output = self.optimize(seed_img, self.iterations[self.scale]) #, lr=0.2)

            # For the next scale, we'll reuse a biliniear interpolated version of this output.
            self.seed_img = resize.UpscaleBuilder(factor, mode='bilinear').build(output).detach()

        # Save the final image at the finest scale to disk.
        basename = os.path.splitext(os.path.basename(self.args.content or self.args.style))[0]
        images.save_to_file(self.image.clone().detach().cpu(), self.args.output or ('output/%s_final.png' % basename))