def __init__(self, requires_grad=False): super(Vgg19_Caffe, self).__init__() vgg_pretrained_features = loadCaffemodel( model_file='./models/vgg19-d01eb7cb.pth', pooling='max', use_gpu='1', disable_check=False, ).features self.slice_relu1_1 = torch.nn.Sequential() self.slice_relu2_1 = torch.nn.Sequential() self.slice_relu3_1 = torch.nn.Sequential() self.slice_relu4_1 = torch.nn.Sequential() self.slice_relu4_2 = torch.nn.Sequential() self.slice_relu5_1 = torch.nn.Sequential() print('vgg_pretrained_features', vgg_pretrained_features) for x in range(2): self.slice_relu1_1.add_module(str(x), vgg_pretrained_features[x]) for x in range(2, 7): self.slice_relu2_1.add_module(str(x), vgg_pretrained_features[x]) for x in range(7, 12): self.slice_relu3_1.add_module(str(x), vgg_pretrained_features[x]) for x in range(12, 21): self.slice_relu4_1.add_module(str(x), vgg_pretrained_features[x]) for x in range(21, 23): self.slice_relu4_2.add_module(str(x), vgg_pretrained_features[x]) for x in range(23, 30): self.slice_relu5_1.add_module(str(x), vgg_pretrained_features[x]) if not requires_grad: for param in self.parameters(): param.requires_grad = False
def main(): # Build the model definition and setup pooling layers: cnn, layerList = loadCaffemodel(params.model_file, params.pooling, -1) img, image_size = preprocess(params.input_image, params.image_size) output_filename, file_extension = os.path.splitext(params.output_image) try: os.makedirs(params.output_dir) except OSError: pass cnn = copy.deepcopy(cnn) net = nn.Sequential() c, r, p = 0, 0, 0 convName, reluName, poolName = None, None, None for layer in list(cnn): if isinstance(layer, nn.Conv2d): net.add_module(str(len(net)), layer) convName = layerList['C'][c] c += 1 if isinstance(layer, nn.ReLU): net.add_module(str(len(net)), layer) reluName = layerList['R'][r] r += 1 if isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d): net.add_module(str(len(net)), layer) poolName = layerList['P'][p] p += 1 if convName == params.layer or reluName == params.layer or poolName == params.layer: break # Get the activations y = net(img).squeeze(0) n = y.size(0) for i in range(n): y3 = torch.Tensor(3, y.size(1), y.size(2)) y1 = y.clone().narrow(0, i, 1) y3[0] = y1 y3[1] = y1 y3[2] = y1 filename = str( params.output_dir) + "/" + str(output_filename) + "-" + str( params.layer) + "-" + str(i) + file_extension deprocess(y3, image_size, filename) print("Saving image: " + filename) if i == (n - 1): break
def main(): # Build the model definition and setup pooling layers: cnn, layerList = loadCaffemodel(params.model_file, params.pooling, -1) img = preprocess(params.input_image, params.image_size).float() cnn = copy.deepcopy(cnn) net = nn.Sequential() c, r, p = 0, 0, 0 convName, reluName, poolName = None, None, None for layer in list(cnn): if isinstance(layer, nn.Conv2d): net.add_module(str(len(net)), layer) convName = layerList['C'][c] c += 1 if isinstance(layer, nn.ReLU): net.add_module(str(len(net)), layer) reluName = layerList['R'][r] r += 1 if isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d): net.add_module(str(len(net)), layer) poolName = layerList['P'][p] p += 1 if convName == params.layer or reluName == params.layer or poolName == params.layer: break # Get the activations fmaps = net(img) y = torch.sum(fmaps, 1) m = y.max() y = y.mul_(255).div_(m) y3 = torch.Tensor(3, y.size(1), y.size(2)) y1 = y[0] y3[0] = y1.data y3[1] = y1.data y3[2] = y1.data print("Saving image") deprocess(y3, params.output_image)
def main(): dtype, multidevice, backward_device = setup_gpu() cnn, layerList = loadCaffemodel(params.model_file, params.pooling, params.gpu, params.disable_check) content_image = preprocess(params.content_image, params.image_size).type(dtype) style_image_input = params.style_image.split(',') style_image_list, ext = [], [".jpg", ".jpeg", ".png", ".tiff"] for image in style_image_input: if os.path.isdir(image): images = (image + "/" + file for file in os.listdir(image) if os.path.splitext(file)[1].lower() in ext) style_image_list.extend(images) else: style_image_list.append(image) style_images_caffe = [] for image in style_image_list: style_size = int(params.image_size * params.style_scale) img_caffe = preprocess(image, style_size).type(dtype) style_images_caffe.append(img_caffe) if params.init_image != None: image_size = (content_image.size(2), content_image.size(3)) init_image = preprocess(params.init_image, image_size).type(dtype) # Handle style blending weights for multiple style inputs style_blend_weights = [] if params.style_blend_weights == None: # Style blending not specified, so use equal weighting for i in style_image_list: style_blend_weights.append(1.0) for i, blend_weights in enumerate(style_blend_weights): style_blend_weights[i] = int(style_blend_weights[i]) else: style_blend_weights = params.style_blend_weights.split(',') assert len(style_blend_weights) == len(style_image_list), \ "-style_blend_weights and -style_images must have the same number of elements!" # Normalize the style blending weights so they sum to 1 style_blend_sum = 0 for i, blend_weights in enumerate(style_blend_weights): style_blend_weights[i] = float(style_blend_weights[i]) style_blend_sum = float(style_blend_sum) + style_blend_weights[i] for i, blend_weights in enumerate(style_blend_weights): style_blend_weights[i] = float(style_blend_weights[i]) / float(style_blend_sum) content_layers = params.content_layers.split(',') style_layers = params.style_layers.split(',') # Set up the network, inserting style and content loss modules cnn = copy.deepcopy(cnn) content_losses, style_losses, tv_losses = [], [], [] next_content_idx, next_style_idx = 1, 1 net = nn.Sequential() c, r = 0, 0 if params.tv_weight > 0: tv_mod = TVLoss(params.tv_weight).type(dtype) net.add_module(str(len(net)), tv_mod) tv_losses.append(tv_mod) for i, layer in enumerate(list(cnn), 1): if next_content_idx <= len(content_layers) or next_style_idx <= len(style_layers): if isinstance(layer, nn.Conv2d): net.add_module(str(len(net)), layer) if layerList['C'][c] in content_layers: print("Setting up content layer " + str(i) + ": " + str(layerList['C'][c])) loss_module = ContentLoss(params.content_weight) net.add_module(str(len(net)), loss_module) content_losses.append(loss_module) if layerList['C'][c] in style_layers: print("Setting up style layer " + str(i) + ": " + str(layerList['C'][c])) loss_module = StyleLoss(params.style_weight) net.add_module(str(len(net)), loss_module) style_losses.append(loss_module) c+=1 if isinstance(layer, nn.ReLU): net.add_module(str(len(net)), layer) if layerList['R'][r] in content_layers: print("Setting up content layer " + str(i) + ": " + str(layerList['R'][r])) loss_module = ContentLoss(params.content_weight) net.add_module(str(len(net)), loss_module) content_losses.append(loss_module) next_content_idx += 1 if layerList['R'][r] in style_layers: print("Setting up style layer " + str(i) + ": " + str(layerList['R'][r])) loss_module = StyleLoss(params.style_weight) net.add_module(str(len(net)), loss_module) style_losses.append(loss_module) next_style_idx += 1 r+=1 if isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d): net.add_module(str(len(net)), layer) if multidevice: net = setup_multi_device(net) # Capture content targets for i in content_losses: i.mode = 'capture' print("Capturing content targets") print_torch(net, multidevice) net(content_image) # Capture style targets for i in content_losses: i.mode = 'None' for i, image in enumerate(style_images_caffe): print("Capturing style target " + str(i+1)) for j in style_losses: j.mode = 'capture' j.blend_weight = style_blend_weights[i] net(style_images_caffe[i]) # Set all loss modules to loss mode for i in content_losses: i.mode = 'loss' for i in style_losses: i.mode = 'loss' # Maybe normalize content and style weights if params.normalize_weights: normalize_weights(content_losses, style_losses) # Freeze the network in order to prevent # unnecessary gradient calculations for param in net.parameters(): param.requires_grad = False # Initialize the image if params.seed >= 0: torch.manual_seed(params.seed) torch.cuda.manual_seed_all(params.seed) torch.backends.cudnn.deterministic=True if params.init == 'random': B, C, H, W = content_image.size() img = torch.randn(C, H, W).mul(0.001).unsqueeze(0).type(dtype) elif params.init == 'image': if params.init_image != None: img = init_image.clone() else: img = content_image.clone() img = nn.Parameter(img) def maybe_print(t, loss): if params.print_iter > 0 and t % params.print_iter == 0: print("Iteration " + str(t) + " / "+ str(params.num_iterations)) for i, loss_module in enumerate(content_losses): print(" Content " + str(i+1) + " loss: " + str(loss_module.loss.item())) for i, loss_module in enumerate(style_losses): print(" Style " + str(i+1) + " loss: " + str(loss_module.loss.item())) print(" Total loss: " + str(loss.item())) def maybe_save(t): should_save = params.save_iter > 0 and t % params.save_iter == 0 should_save = should_save or t == params.num_iterations if should_save: output_filename, file_extension = os.path.splitext(params.output_image) if t == params.num_iterations: filename = output_filename + str(file_extension) else: filename = str(output_filename) + "_" + str(t) + str(file_extension) disp = deprocess(img.clone()) # Maybe perform postprocessing for color-independent style transfer if params.original_colors == 1: disp = original_colors(deprocess(content_image.clone()), disp) disp.save(str(filename)) # Function to evaluate loss and gradient. We run the net forward and # backward to get the gradient, and sum up losses from the loss modules. # optim.lbfgs internally handles iteration and calls this function many # times, so we manually count the number of iterations to handle printing # and saving intermediate results. num_calls = [0] def feval(): num_calls[0] += 1 optimizer.zero_grad() net(img) loss = 0 for mod in content_losses: loss += mod.loss.to(backward_device) for mod in style_losses: loss += mod.loss.to(backward_device) if params.tv_weight > 0: for mod in tv_losses: loss += mod.loss.to(backward_device) loss.backward() maybe_save(num_calls[0]) maybe_print(num_calls[0], loss) return loss optimizer, loopVal = setup_optimizer(img) while num_calls[0] <= loopVal: optimizer.step(feval)
def __init__(self, params, dtype, multidevice, backward_device, verbose=True): super(StyleNet, self).__init__() self.params = params self.content_masks_orig = None self.style_masks_orig = None self.dtype, self.multidevice, self.backward_device = dtype, multidevice, backward_device self.content_losses, self.style_losses, self.hist_losses, self.tv_losses = [], [], [], [] self.verbose = verbose self.tv_weight = default_tv_weight self.content_weight = default_content_weight self.style_weight = default_style_weight self.hist_weight = default_hist_weight self.style_stat = default_style_stat self.normalize_gradients = default_normalize_gradients self.save_parameters() global maxpool2d_blurred_layer if maxpool2d_blurred_layer is None: maxpool2d_blurred_layer = MaxPool2d(kernel_size=2, stride=2) content_layers = params.content_layers.split(',') style_layers = params.style_layers.split(',') hist_layers = params.hist_layers.split(',') next_content_idx, next_style_idx, next_hist_idx, c, r = 1, 1, 1, 0, 0 cnn, layerList = loadCaffemodel(params.model_file, params.pooling, params.gpu, params.disable_check, self.verbose) net = nn.Sequential() if self.tv_weight > 0: tv_mod = TVLoss(self.tv_weight).type(self.dtype) net.add_module(str(len(net)), tv_mod) self.tv_losses.append(tv_mod) for i, layer in enumerate(list(cnn), 1): if next_content_idx <= len( content_layers) or next_style_idx <= len( style_layers) or next_hist_idx <= len(hist_layers): if isinstance(layer, nn.Conv2d): net.add_module(str(len(net)), layer) if layerList['C'][c] in content_layers: log( "Setting up content layer " + str(i) + ": " + str(layerList['C'][c]), self.verbose) loss_module = ContentLoss(self.content_weight, self.normalize_gradients) net.add_module(str(len(net)), loss_module) self.content_losses.append(loss_module) if layerList['C'][c] in style_layers: log( "Setting up style layer " + str(i) + ": " + str(layerList['C'][c]), self.verbose) loss_module = MaskedStyleLoss(self.style_weight, self.normalize_gradients) net.add_module(str(len(net)), loss_module) self.style_losses.append(loss_module) c += 1 if isinstance(layer, nn.ReLU): net.add_module(str(len(net)), layer) if layerList['R'][r] in content_layers: log( "Setting up content layer " + str(i) + ": " + str(layerList['R'][r]), self.verbose) loss_module = ContentLoss(self.content_weight, self.normalize_gradients) net.add_module(str(len(net)), loss_module) self.content_losses.append(loss_module) next_content_idx += 1 if layerList['R'][r] in style_layers: log( "Setting up style layer " + str(i) + ": " + str(layerList['R'][r]), self.verbose) loss_module = MaskedStyleLoss(self.style_weight, self.normalize_gradients) net.add_module(str(len(net)), loss_module) self.style_losses.append(loss_module) next_style_idx += 1 if layerList['R'][r] in hist_layers: log( "Setting up histogram layer " + str(i) + ": " + str(layerList['R'][r]), self.verbose) loss_module = MaskedHistLoss(self.hist_weight, self.normalize_gradients) net.add_module(str(len(net)), loss_module) self.hist_losses.append(loss_module) next_hist_idx += 1 r += 1 if isinstance(layer, nn.MaxPool2d) or isinstance( layer, nn.AvgPool2d): #net.add_module(str(len(net)), layer) net.add_module(str(len(net)), maxpool2d_blurred_layer) self.net = net log(self.net, self.verbose) # Freeze the network to prevent unnecessary gradient calculations for param in self.net.parameters(): param.requires_grad = False # Setup multidevice if self.multidevice: self.__setup_multi_device(params.gpu, params.multidevice_strategy) log('Model setup successfully with parameters:\n%s' % params, True)