def evaluate(): if mx.context.num_gpus() > 0: ctx = mx.gpu() else: ctx = mx.cpu(0) # loading configs args = Options().parse() cfg = Configs(args.config_path) # set logging level logging.basicConfig(level=logging.INFO) # images content_image = tensor_load_rgbimage(cfg.content_image, ctx, size=cfg.val_img_size, keep_asp=True) style_image = tensor_load_rgbimage(cfg.style_image, ctx, size=cfg.val_style_size) style_image = preprocess_batch(style_image) # model style_model = Net(ngf=cfg.ngf) style_model.collect_params().load(cfg.val_model, ctx=ctx) # forward output = style_model(content_image, style_image) # save img tensor_save_bgrimage(output[0], cfg.output_img) logging.info("Save img to {}".format(cfg.output_img))
def evaluate(args): content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) style_model = HangSNetV1() style_model.load_state_dict(torch.load(args.model)) if args.cuda: style_model.cuda() vgg.cuda() content_image = content_image.cuda() style = style.cuda() style_v = Variable(style, volatile=True) utils.subtract_imagenet_mean_batch(style_v) features_style = vgg(style_v) gram_style = [utils.gram_matrix(y) for y in features_style] content_image = Variable(utils.preprocess_batch(content_image)) target = Variable(gram_style[2].data, requires_grad=False) style_model.setTarget(target) output = style_model(content_image) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def stylize(args): #content_image = utils.tensor_load_rgbimage(args.content_image, scale = args.content_scale) #content_image = content_image.unsqueeze(0) content_image = None if args.srcnn: content_image = utils.tensor_load_rgbimage(args.content_image, scale=args.upsample) else: content_image = utils.tensor_load_rgbimage(args.content_image) content_image.unsqueeze_(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image), volatile=True) style_model = None if args.srcnn: style_model = SRCNN() else: style_model = TransformerNet(args.arch) ##style_model = TransformerNet() style_model.load_state_dict(torch.load(args.model)) if args.cuda: style_model.cuda() output = style_model(content_image) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def evaluate(args): content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) style_model = Net(ngf=args.ngf) style_model.load_state_dict(torch.load(args.model), False) if args.cuda: style_model.cuda() content_image = content_image.cuda() style = style.cuda() style_v = Variable(style) content_image = Variable(utils.preprocess_batch(content_image)) style_model.setTarget(style_v) output = style_model(content_image) #output = utils.color_match(output, style_v) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def fast_evaluate(args, basedir, contents, idx=0): # basedir to save the data style_model = Net(ngf=args.ngf) style_model.load_state_dict(torch.load(args.model), False) style_model.eval() if args.cuda: style_model.cuda() style_loader = StyleLoader(args.style_folder, args.style_size, cuda=args.cuda) for content_image in contents: idx += 1 content_image = utils.tensor_load_rgbimage(content_image, size=args.content_size, keep_asp=True).unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image)) for isx in range(style_loader.size()): style_v = Variable(style_loader.get(isx).data) style_model.setTarget(style_v) output = style_model(content_image) filename = os.path.join(basedir, "{}_{}.png".format(idx, isx + 1)) utils.tensor_save_bgrimage(output.data[0], filename, args.cuda) print(filename)
def evaluate(my_content_image, my_content_size, my_style_image,my_style_size, my_ngf, my_cuda, my_output_image, my_model): content_image = utils.tensor_load_rgbimage(my_content_image, size=my_content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(my_style_image, size=my_style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) style_model = Net(ngf=my_ngf) model_dict = torch.load(my_model) model_dict_clone = model_dict.copy() for key, value in model_dict_clone.items(): if key.endswith(('running_mean', 'running_var')): del model_dict[key] style_model.load_state_dict(model_dict, False) if my_cuda: style_model.cuda() content_image = content_image.cuda() style = style.cuda() style_v = Variable(style) content_image = Variable(utils.preprocess_batch(content_image)) style_model.setTarget(style_v) output = style_model(content_image) #output = utils.color_match(output, style_v) utils.tensor_save_bgrimage(output.data[0], my_output_image, my_cuda) return utils.tensor_return_bgrimage(output.data[0], my_cuda)
def main(): args = Options().parse() style_model = Net(ngf=args.ngf) model_dict = torch.load(args.model) model_dict_clone = model_dict.copy() for key, value in model_dict_clone.items(): if key.endswith(('running_mean', 'running_var')): del model_dict[key] style_model.load_state_dict(model_dict, False) style_loaders = utils.StyleLoader(args.style_folder, args.style_size) content_image = utils.tensor_load_rgbimage(args.content_image, size=args.style_size, keep_asp=True) content_image = content_image.unsqueeze(0) if args.cuda: style_model.cuda() content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image)) # for i, style_loader in enumerate(style_loaders): for i in range(style_loaders.size()): print(i) style_v = style_loaders.get(i) style_model.setTarget(style_v) output = style_model(content_image) filepath = "out/output" + str(i + 1) + '.jpg' print(filepath) utils.tensor_save_bgrimage(output.data[0], filepath, args.cuda)
def optimize(args): """ Gatys et al. CVPR 2017 ref: Image Style Transfer Using Convolutional Neural Networks """ # load the content and style target content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) content_image = Variable(utils.preprocess_batch(content_image), requires_grad=False) content_image = utils.subtract_imagenet_mean_batch(content_image) style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) style_image = utils.subtract_imagenet_mean_batch(style_image) # load the pre-trained vgg-16 and extract features vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) if args.cuda: content_image = content_image.cuda() style_image = style_image.cuda() vgg.cuda() features_content = vgg(content_image) f_xc_c = Variable(features_content[1].data, requires_grad=False) features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # init optimizer output = Variable(content_image.data, requires_grad=True) optimizer = Adam([output], lr=args.lr) mse_loss = torch.nn.MSELoss() # optimizing the images for e in range(args.iters): utils.imagenet_clamp_batch(output, 0, 255) optimizer.zero_grad() features_y = vgg(output) content_loss = args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = utils.gram_matrix(features_y[m]) gram_s = Variable(gram_style[m].data, requires_grad=False) style_loss += args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss if (e + 1) % args.log_interval == 0: print(total_loss.data.cpu().numpy()[0]) total_loss.backward() optimizer.step() # save the image output = utils.add_imagenet_mean_batch(output) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def optimize(args): """ Gatys et al. CVPR 2017 ref: Image Style Transfer Using Convolutional Neural Networks """ if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # load the content and style target content_image = utils.tensor_load_rgbimage(args.content_image, ctx, size=args.content_size, keep_asp=True) content_image = utils.subtract_imagenet_mean_preprocess_batch( content_image) style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size) style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image) # load the pre-trained vgg-16 and extract features vgg = net.Vgg16() utils.init_vgg_params(vgg, 'models', ctx=ctx) # content feature f_xc_c = vgg(content_image)[1] # style feature features_style = vgg(style_image) gram_style = [net.gram_matrix(y) for y in features_style] # output output = Parameter('output', shape=content_image.shape) output.initialize(ctx=ctx) output.set_data(content_image) # optimizer trainer = gluon.Trainer([output], 'adam', {'learning_rate': args.lr}) mse_loss = gluon.loss.L2Loss() # optimizing the images for e in range(args.iters): utils.imagenet_clamp_batch(output.data(), 0, 255) # fix BN for pre-trained vgg with autograd.record(): features_y = vgg(output.data()) content_loss = 2 * args.content_weight * mse_loss( features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = net.gram_matrix(features_y[m]) gram_s = gram_style[m] style_loss = style_loss + 2 * args.style_weight * mse_loss( gram_y, gram_s) total_loss = content_loss + style_loss total_loss.backward() trainer.step(1) if (e + 1) % args.log_interval == 0: print('loss:{:.2f}'.format(total_loss.asnumpy()[0])) # save the image output = utils.add_imagenet_mean_batch(output.data()) utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
def stylize(args): content_image = utils.tensor_load_rgbimage(args.content_image, scale=args.content_scale) content_image = content_image.unsqueeze(0) content_image = Variable(utils.preprocess_batch(content_image)) style_model = torch.load(args.model) output = style_model(content_image) utils.tensor_save_bgrimage(output.data[0], args.output_image)
def stylize(args): if args.model.endswith(".onnx"): return stylize_onnx_caffe2(args) content_image = utils.tensor_load_rgbimage(args.content_image, scale=args.content_scale) content_image = content_image.unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image), requires_grad=False) style_model = TransformerNet() state_dict = torch.load(args.model) # removed_modules = ['in2'] in_names = [ "in1.scale", "in1.shift", "in2.scale", "in2.shift", "in3.scale", "in3.shift", "res1.in1.scale", "res1.in1.shift", "res1.in2.scale", "res1.in2.shift", "res2.in1.scale", "res2.in1.shift", "res2.in2.scale", "res2.in2.shift", "res3.in1.scale", "res3.in1.shift", "res3.in2.scale", "res3.in2.shift", "res4.in1.scale", "res4.in1.shift", "res4.in2.scale", "res4.in2.shift", "res5.in1.scale", "res5.in1.shift", "res5.in2.scale", "res5.in2.shift", "in4.scale", "in4.shift", "in5.scale", "in5.shift" ] # kl = list(state_dict.keys()) # for k in kl: for k in in_names: state_dict[k.replace("scale", "weight").replace("shift", "bias")] = state_dict.pop(k) style_model.load_state_dict(state_dict) if args.cuda: style_model.cuda() if args.half: style_model.half() content_image = content_image.half() if args.export_onnx: assert args.export_onnx.endswith( ".onnx"), "Export model file should end with .onnx" output = torch.onnx._export(style_model, content_image, args.export_onnx) else: output = style_model(content_image) if args.half: output = output.float() utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def optimize(args): """ Gatys et al. CVPR 2017 ref: Image Style Transfer Using Convolutional Neural Networks """ if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # load the content and style target content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True) content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image) style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size) style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image) # load the pre-trained vgg-16 and extract features vgg = net.Vgg16() utils.init_vgg_params(vgg, 'models', ctx=ctx) # content feature f_xc_c = vgg(content_image)[1] # style feature features_style = vgg(style_image) gram_style = [net.gram_matrix(y) for y in features_style] # output output = Parameter('output', shape=content_image.shape) output.initialize(ctx=ctx) output.set_data(content_image) # optimizer trainer = gluon.Trainer([output], 'adam', {'learning_rate': args.lr}) mse_loss = gluon.loss.L2Loss() # optimizing the images for e in range(args.iters): utils.imagenet_clamp_batch(output.data(), 0, 255) # fix BN for pre-trained vgg with autograd.record(): features_y = vgg(output.data()) content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = net.gram_matrix(features_y[m]) gram_s = gram_style[m] style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss total_loss.backward() trainer.step(1) if (e + 1) % args.log_interval == 0: print('loss:{:.2f}'.format(total_loss.asnumpy()[0])) # save the image output = utils.add_imagenet_mean_batch(output.data()) utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
def stylize(args): content_image = utils.tensor_load_rgbimage(args.content_image, scale=args.content_scale) content_image = content_image.unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image)) style_model = TransformerNet() style_model.load_state_dict(torch.load(args.model)) if args.cuda: style_model.cuda() output = style_model(content_image) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def evaluate(args): if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # images content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True) style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size) style_image = utils.preprocess_batch(style_image) # model style_model = net.Net(ngf=args.ngf) style_model.collect_params().load(args.model, ctx=ctx) # forward style_model.setTarget(style_image) output = style_model(content_image) utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
def evaluate(model_dir, c_img, s_img, img_size, out_img): content_image = utils.tensor_load_rgbimage(c_img, size=img_size, keep_asp=True) content_image = content_image.unsqueeze(0).to(device) style = utils.tensor_load_rgbimage(s_img, size=img_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style).to(device) style_model = Net(ngf=FILTER_CHANNEL, dv=device).to(device) style_model.load_state_dict(torch.load(model_dir), False) style_v = Variable(style) content_image = Variable(utils.preprocess_batch(content_image)) style_model.setTarget(style_v) output = style_model(content_image) #output = utils.color_match(output, style_v) utils.tensor_save_bgrimage(output.data[0], out_img) print ('Done')
def evaluate(args): # set output_image dirname = os.path.dirname(args.content_image) style_ = os.path.basename(args.style_image).split('.')[0] basename = style_ + '_' + os.path.basename(args.content_image) args.output_image = os.path.join(dirname, basename) content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) model_dict = torch.load(args.model) model_dict_clone = model_dict.copy() # We can't mutate while iterating for key, value in model_dict_clone.items(): if key.endswith(('running_mean', 'running_var')): del model_dict[key] style_model = Net(ngf=args.ngf) style_model.load_state_dict(model_dict, False) # style_model = Net(ngf=args.ngf) # style_model.load_state_dict(torch.load(args.model), False) if args.cuda: style_model.cuda() content_image = content_image.cuda() style = style.cuda() style_v = Variable(style) content_image = Variable(utils.preprocess_batch(content_image)) style_model.setTarget(style_v) output = style_model(content_image) #output = utils.color_match(output, style_v) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def fast_evaluate(args, basedir, contents, idx=0): # basedir to save the data # style_model = Net(ngf=args.ngf) model_dict = torch.load( args.model ) # or args.resume, matching what's in the line with style_model.load_state_dict model_dict_clone = model_dict.copy() # We can't mutate while iterating for key, value in model_dict_clone.items(): if key.endswith(('running_mean', 'running_var')): del model_dict[key] style_model = Net(ngf=128) # to run with torch-0.3.0.post4 # style_model.load_state_dict(torch.load(args.model), False) style_model.load_state_dict(model_dict, False) style_model.eval() if args.cuda: style_model.cuda() style_loader = StyleLoader(args.style_folder, args.style_size, cuda=args.cuda) for content_image in contents: idx += 1 content_image = utils.tensor_load_rgbimage(content_image, size=args.content_size, keep_asp=True).unsqueeze(0) if args.cuda: content_image = content_image.cuda() content_image = Variable(utils.preprocess_batch(content_image)) for isx in range(style_loader.size()): style_v = Variable(style_loader.get(isx).data) style_model.setTarget(style_v) output = style_model(content_image) filename = os.path.join(basedir, "{}_{}.png".format(idx, isx + 1)) utils.tensor_save_bgrimage(output.data[0], filename, args.cuda) print(filename)
def evaluate(args): content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) style_model = Net(ngf=args.ngf) # comment out for PyTorch 0.4 style_model.load_state_dict(torch.load(args.model), False) # comment out for PyTorch 0.4 # https://github.com/zhanghang1989/PyTorch-Multi-Style-Transfer/issues/21 # Compatibility shim for PyTorch 0.4 # model_dict = torch.load('models/icons2.model') # uncomment for PyTorch 0.4 # model_dict_clone = model_dict.copy() # uncomment for PyTorch 0.4 # for key, value in model_dict_clone.items(): # uncomment for PyTorch 0.4 # if key.endswith(('running_mean', 'running_var')): # uncomment for PyTorch 0.4 # del model_dict[key] # uncomment for PyTorch 0.4 ### Next cell #style_model = Net(ngf=128) #style_model.load_state_dict(model_dict, False) if args.cuda: style_model.cuda() content_image = content_image.cuda() style = style.cuda() style_v = Variable(style) content_image = Variable(utils.preprocess_batch(content_image)) style_model.setTarget(style_v) output = style_model(content_image) #output = utils.color_match(output, style_v) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def evaluate(args): content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) # style_model = Net(ngf=args.ngf) model_dict = torch.load( args.model ) # or args.resume, matching what's in the line with style_model.load_state_dict model_dict_clone = model_dict.copy() # We can't mutate while iterating for key, value in model_dict_clone.items(): if key.endswith(('running_mean', 'running_var')): del model_dict[key] style_model = Net(ngf=128) # to run with torch-0.3.0.post4 # style_model.load_state_dict(torch.load(args.model), False) style_model.load_state_dict(model_dict, False) if args.cuda: style_model.cuda() content_image = content_image.cuda() style = style.cuda() style_v = Variable(style) content_image = Variable(utils.preprocess_batch(content_image)) style_model.setTarget(style_v) output = style_model(content_image) #output = utils.color_match(output, style_v) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def evaluate(args): if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # images content_image = utils.tensor_load_rgbimage(args.content_image, ctx, size=args.content_size, keep_asp=True) style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size) style_image = utils.preprocess_batch(style_image) # model WIDTH = content_image.shape[2] HEIGHT = content_image.shape[3] style_model = net.Net(ngf=args.ngf, width=WIDTH, height=HEIGHT) style_model.collect_params().load(args.model, ctx=ctx) # forward style_model.setTarget(style_image) output = style_model(content_image) utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda) #Added to save and visualise model x = mx.sym.var('data') a = mx.sym.var('width') b = mx.sym.var('height') y = style_model(x) #y_json = y.tojson() y.save("MODEL.json") y_json = y.tojson() #print( "Network\n%s" % y_json) style_model.save_params("MODEL.params") graph = mx.viz.plot_network(y, save_format='pdf') graph.render() mx.visualization.print_summary(y, {'data': (1, 3, WIDTH, HEIGHT)})
def evaluate(raw_content_image, raw_content_size, style_image, style_size, cuda, output_name): content_image = utils.tensor_load_rgbimage(raw_content_image, size=raw_content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(style_image, size=style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) style_v = Variable(style) content_image = Variable(utils.preprocess_batch(content_image)) style_model.setTarget(style_v) output = style_model(content_image) transfer_image = utils.tensor_save_bgrimage(output.data[0], output_name, cuda) return transfer_image
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 0, 'pin_memory': False} if args.model_type == "rnn": transformer = transformer_net.TransformerRNN(args.pad_type) seq_size = 4 else: transformer = transformer_net.TransformerNet(args.pad_type) seq_size = 2 train_dataset = dataset.DAVISDataset(args.dataset, "train", seq_size=seq_size, interval=args.interval, no_flow=True) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, **kwargs) model_path = args.init_model print("=> Load from model file %s" % model_path) transformer.load_state_dict(torch.load(model_path)) transformer.train() if args.model_type == "rnn": transformer.conv1 = transformer_net.ConvLayer(6, 32, kernel_size=9, stride=1, pad_type=args.pad_type) optimizer = torch.optim.Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() l1_loss = torch.nn.L1Loss() vgg = Vgg16() vgg.load_state_dict(torch.load(os.path.join(args.vgg_model))) vgg.eval() transformer.cuda() vgg.cuda() mse_loss.cuda() style = utils.tensor_load_resize(args.style_image, args.style_size) style = style.unsqueeze(0) print("=> Style image size: " + str(style.size())) print("=> Pixel FDB loss weight: %f" % args.time_strength1) print("=> Feature FDB loss weight: %f" % args.time_strength2) style = utils.preprocess_batch(style).cuda() utils.tensor_save_bgrimage( style[0].detach(), os.path.join(args.save_model_dir, 'train_style.jpg'), True) style = utils.subtract_imagenet_mean_batch(style) features_style = vgg(style) gram_style = [utils.gram_matrix(y).detach() for y in features_style] for e in range(args.epochs): agg_content_loss = agg_style_loss = agg_pixelfdb_loss = agg_featurefdb_loss = 0. iters = 0 for batch_id, (x, flow, occ, _) in enumerate(train_loader): x = x[0] iters += 1 optimizer.zero_grad() x = utils.preprocess_batch(x).cuda() y = transformer(x) # (N, 3, 256, 256) if (batch_id + 1) % 100 == 0: idx = (batch_id + 1) // 100 for i in range(args.batch_size): utils.tensor_save_bgrimage( y.data[i], os.path.join(args.save_model_dir, "out_%02d_%02d.png" % (idx, i)), True) utils.tensor_save_bgrimage( x.data[i], os.path.join(args.save_model_dir, "in_%02d-%02d.png" % (idx, i)), True) #xc = center_crop(x.detach(), y.shape[2], y.shape[3]) y = utils.subtract_imagenet_mean_batch(y) x = utils.subtract_imagenet_mean_batch(x) features_y = vgg(y) features_xc = vgg(x) #content target f_xc_c = features_xc[2].detach() # content f_c = features_y[2] content_loss = args.content_weight * mse_loss(f_c, f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_s = gram_style[m] gram_y = utils.gram_matrix(features_y[m]) batch_style_loss = 0 for n in range(gram_y.shape[0]): batch_style_loss += args.style_weight * mse_loss( gram_y[n], gram_s[0]) style_loss += batch_style_loss / gram_y.shape[0] # FDB pixel_fdb_loss = args.time_strength1 * mse_loss( y[1:] - y[:-1], x[1:] - x[:-1]) # temporal content: 16th feature_fdb_loss = args.time_strength2 * l1_loss( features_y[2][1:] - features_y[2][:-1], features_xc[2][1:] - features_xc[2][:-1]) total_loss = content_loss + style_loss + pixel_fdb_loss + feature_fdb_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.data agg_style_loss += style_loss.data agg_pixelfdb_loss += pixel_fdb_loss.data agg_featurefdb_loss += feature_fdb_loss.data agg_total = agg_content_loss + agg_style_loss + agg_pixelfdb_loss + agg_featurefdb_loss mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\tpixel fdb: {:.6f}\tfeature fdb: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, batch_id + 1, len(train_loader), agg_content_loss / iters, agg_style_loss / iters, agg_pixelfdb_loss / iters, agg_featurefdb_loss / iters, agg_total / iters) print(mesg) agg_content_loss = agg_style_loss = agg_pixelfdb_loss = agg_featurefdb_loss = 0.0 iters = 0 # save model save_model_filename = "epoch_" + str(e) + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 0, 'pin_memory': False} else: kwargs = {} if args.model_type == "rnn": transformer = transformer_net.TransformerRNN(args.pad_type) seq_size = 4 else: transformer = transformer_net.TransformerNet(args.pad_type) seq_size = 2 train_dataset = dataset.DAVISDataset(args.dataset, seq_size=seq_size, use_flow=args.flow) train_loader = DataLoader(train_dataset, batch_size=1, **kwargs) if args.model_type == "rnn": transformer = transformer_net.TransformerRNN(args.pad_type) else: transformer = transformer_net.TransformerNet(args.pad_type) model_path = args.init_model print("=> Load from model file %s" % model_path) transformer.load_state_dict(torch.load(model_path)) transformer.train() if args.model_type == "rnn": transformer.conv1 = transformer_net.ConvLayer(6, 32, kernel_size=9, stride=1, pad_type=args.pad_type) optimizer = torch.optim.Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() l1_loss = torch.nn.SmoothL1Loss() vgg = Vgg16() vgg.load_state_dict( torch.load(os.path.join(args.vgg_model, "vgg16.weight"))) vgg.eval() if args.cuda: transformer.cuda() vgg.cuda() mse_loss.cuda() l1_loss.cuda() style = utils.tensor_load_resize(args.style_image, args.style_size) style = style.unsqueeze(0) print("=> Style image size: " + str(style.size())) print("=> Pixel OFB loss weight: %f" % args.time_strength) style = utils.preprocess_batch(style) if args.cuda: style = style.cuda() utils.tensor_save_bgrimage( style[0].detach(), os.path.join(args.save_model_dir, 'train_style.jpg'), args.cuda) style = utils.subtract_imagenet_mean_batch(style) features_style = vgg(style) gram_style = [utils.gram_matrix(y).detach() for y in features_style] for e in range(args.epochs): train_loader.dataset.reset() transformer.train() transformer.cuda() agg_content_loss = agg_style_loss = agg_pixelofb_loss = 0. iters = 0 anormaly = False for batch_id, (x, flow, conf) in enumerate(train_loader): x, flow, conf = x[0], flow[0], conf[0] iters += 1 optimizer.zero_grad() x = utils.preprocess_batch(x) # (N, 3, 256, 256) if args.cuda: x = x.cuda() flow = flow.cuda() conf = conf.cuda() y = transformer(x) # (N, 3, 256, 256) xc = center_crop(x.detach(), y.size(2), y.size(3)) vgg_y = utils.subtract_imagenet_mean_batch(y) vgg_x = utils.subtract_imagenet_mean_batch(xc) features_y = vgg(vgg_y) features_xc = vgg(vgg_x) #content target f_xc_c = features_xc[2].detach() # content f_c = features_y[2] #content_feature_target = center_crop(f_xc_c, f_c.size(2), f_c.size(3)) content_loss = args.content_weight * mse_loss(f_c, f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_s = gram_style[m] gram_y = utils.gram_matrix(features_y[m]) batch_style_loss = 0 for n in range(gram_y.shape[0]): batch_style_loss += args.style_weight * mse_loss( gram_y[n], gram_s[0]) style_loss += batch_style_loss / gram_y.shape[0] warped_y, warped_y_mask = warp(y[1:], flow) warped_y = warped_y.detach() warped_y_mask *= conf pixel_ofb_loss = args.time_strength * weighted_mse( y[:-1], warped_y, warped_y_mask) total_loss = content_loss + style_loss + pixel_ofb_loss total_loss.backward() optimizer.step() if (batch_id + 1) % 100 == 0: prefix = args.save_model_dir + "/" idx = (batch_id + 1) // 100 flow_image = flow_to_color( flow[0].detach().cpu().numpy().transpose(1, 2, 0)) utils.save_image(prefix + "forward_flow_%d.png" % idx, flow_image) warped_x, warped_x_mask = warp(x[1:], flow) warped_x = warped_x.detach() warped_x_mask *= conf for i in range(2): utils.tensor_save_bgrimage( y.data[i], prefix + "out_%d-%d.png" % (idx, i), args.cuda) utils.tensor_save_bgrimage( x.data[i], prefix + "in_%d-%d.png" % (idx, i), args.cuda) if i < warped_y.shape[0]: utils.tensor_save_bgrimage( warped_y.data[i], prefix + "wout_%d-%d.png" % (idx, i), args.cuda) utils.tensor_save_bgrimage( warped_x.data[i], prefix + "win_%d-%d.png" % (idx, i), args.cuda) utils.tensor_save_image( prefix + "conf_%d-%d.png" % (idx, i), warped_x_mask.data[i]) agg_content_loss += content_loss.data agg_style_loss += style_loss.data agg_pixelofb_loss += pixel_ofb_loss.data agg_total = agg_content_loss + agg_style_loss + agg_pixelofb_loss mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\tpixel ofb: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, batch_id + 1, len(train_loader), agg_content_loss / iters, agg_style_loss / iters, agg_pixelofb_loss / iters, agg_total / iters) print(mesg) agg_content_loss = agg_style_loss = agg_pixelofb_loss = 0.0 iters = 0 # save model transformer.eval() transformer.cpu() save_model_filename = "epoch_" + str(e) + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def train(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 0, 'pin_memory': False} transform = transforms.Compose([ transforms.Resize((args.image_size, args.image_size)), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = dataset.CustomImageDataset(args.dataset, transform=transform, img_size=args.image_size) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, **kwargs) transformer = TransformerNet(args.pad_type) transformer = transformer.train() optimizer = torch.optim.Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() #print(transformer) vgg = Vgg16() vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) vgg.eval() transformer = transformer.cuda() vgg = vgg.cuda() style = utils.tensor_load_resize(args.style_image, args.style_size) style = style.unsqueeze(0) print("=> Style image size: " + str(style.size())) #(1, H, W, C) style = utils.preprocess_batch(style).cuda() utils.tensor_save_bgrimage( style[0].detach(), os.path.join(args.save_model_dir, 'train_style.jpg'), True) style = utils.subtract_imagenet_mean_batch(style) features_style = vgg(style) gram_style = [utils.gram_matrix(y).detach() for y in features_style] for e in range(args.epochs): train_loader.dataset.reset() agg_content_loss = 0. agg_style_loss = 0. iters = 0 for batch_id, (x, _) in enumerate(train_loader): if x.size(0) != args.batch_size: print("=> Skip incomplete batch") continue iters += 1 optimizer.zero_grad() x = utils.preprocess_batch(x).cuda() y = transformer(x) if (batch_id + 1) % 1000 == 0: idx = (batch_id + 1) // 1000 utils.tensor_save_bgrimage( y.data[0], os.path.join(args.save_model_dir, "out_%d.png" % idx), True) utils.tensor_save_bgrimage( x.data[0], os.path.join(args.save_model_dir, "in_%d.png" % idx), True) y = utils.subtract_imagenet_mean_batch(y) x = utils.subtract_imagenet_mean_batch(x) features_y = vgg(y) features_x = vgg(center_crop(x, y.size(2), y.size(3))) #content target f_x = features_x[2].detach() # content f_y = features_y[2] content_loss = args.content_weight * mse_loss(f_y, f_x) style_loss = 0. for m in range(len(features_y)): gram_s = gram_style[m] gram_y = utils.gram_matrix(features_y[m]) batch_style_loss = 0 for n in range(gram_y.shape[0]): batch_style_loss += args.style_weight * mse_loss( gram_y[n], gram_s[0]) style_loss += batch_style_loss / gram_y.shape[0] total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.data agg_style_loss += style_loss.data mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, batch_id + 1, len(train_loader), agg_content_loss / iters, agg_style_loss / iters, (agg_content_loss + agg_style_loss) / iters) print(mesg) agg_content_loss = agg_style_loss = 0.0 iters = 0 # save model save_model_filename = "epoch_" + str(e) + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
content_image = "./data/" + file content_size = 512 style_image = "images/9styles/udnie.jpg" style_size = 512 content_image = utils.tensor_load_rgbimage(content_image, size=content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(style_image, size=style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) model = Net(ngf=128) model.load_state_dict(torch.load("./models/Final.model")) style_v = Variable(style, volatile=True) content_image = Variable(utils.preprocess_batch(content_image), volatile=True) model.setTarget(style_v) output = model(content_image) print(output.shape) utils.tensor_save_bgrimage( output.data[0], "./data/result/" + file.replace(".jpg", "_result.jpg"), 0) ''' with torch.onnx, people could transfer dynamic model into static, like: dynamic getting image size into just loading static images with fixed size ''' # torch.onnx.export(model, inputs, "Net.proto", verbose=True)
def optimize(args): style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) style_image = utils.subtract_imagenet_mean_batch(style_image) # generate the vector field that we want to backward from size = args.content_size vectors = np.zeros((size, size, 2), dtype=np.float32) vortex_spacing = 0.5 extra_factor = 2. a = np.array([1, 0]) * vortex_spacing b = np.array([np.cos(np.pi / 3), np.sin(np.pi / 3)]) * vortex_spacing rnv = int(2 * extra_factor / vortex_spacing) vortices = [ n * a + m * b for n in range(-rnv, rnv) for m in range(-rnv, rnv) ] vortices = [(x, y) for (x, y) in vortices if -extra_factor < x < extra_factor and -extra_factor < y < extra_factor] xs = np.linspace(-1, 1, size).astype(np.float32)[None, :] ys = np.linspace(-1, 1, size).astype(np.float32)[:, None] for (x, y) in vortices: rsq = (xs - x)**2 + (ys - y)**2 vectors[..., 0] += (ys - y) / rsq vectors[..., 1] += -(xs - x) / rsq # for y in range(size): # for x in range(size): # xx = float(x - size / 2) # yy = float(y - size / 2) # rsq = xx ** 2 + yy ** 2 # if rsq == 0: # vectors[y, x, 0] = 1 # vectors[y, x, 1] = 1 # else: # vectors[y, x, 0] = -yy / rsq # vectors[y, x, 1] = xx / rsq # # vectors[y, x, 0] = 1 # # vectors[y, x, 1] = -1 vectors = utils.tensor_load_vector_field(vectors) # load the pre-trained vgg-16 and extract features vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, 'vgg16.weight'))) if args.cuda: style_image = style_image.cuda() vgg.cuda() features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # load the sobel network sobel = Sobel() if args.cuda: vectors = vectors.cuda() sobel.cuda() # init optimizer vectors_size = vectors.data.size() output_size = np.asarray(vectors_size) output_size[1] = 3 output_size = torch.Size(output_size) output = Variable(torch.randn(output_size, device="cuda") * 30, requires_grad=True) optimizer = Adam([output], lr=args.lr) cosine_loss = CosineLoss() mse_loss = torch.nn.MSELoss() #optimize the images tbar = trange(args.iters) for e in tbar: utils.imagenet_clamp_batch(output, 0, 255) optimizer.zero_grad() sobel_input = utils.gray_bgr_batch(output) sobel_y = sobel(sobel_input) content_loss = args.content_weight * cosine_loss(vectors, sobel_y) vgg_input = output features_y = vgg(vgg_input) style_loss = 0 for m in range(len(features_y)): gram_y = utils.gram_matrix(features_y[m]) gram_s = Variable(gram_style[m].data, requires_grad=False) style_loss += args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss total_loss.backward() optimizer.step() if ((e + 1) % args.log_interval == 0): print("iter: %d content_loss: %f style_loss %f" % (e, content_loss.item() / args.content_weight, style_loss.item() / args.style_weight)) tbar.set_description(str(total_loss.data.cpu().numpy().item())) # save the image output = utils.add_imagenet_mean_batch_device(output, args.cuda) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def optimize(args): content_image = utils.tensor_load_grayimage(args.content_image, size=args.content_size) content_image = content_image.unsqueeze(0) content_image = Variable(content_image, requires_grad=False) content_image = utils.subtract_imagenet_mean_batch_gray(content_image) style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) style_image = utils.subtract_imagenet_mean_batch(style_image) # generate the vector field that we want to stylize # size = args.content_size # vectors = np.zeros((size, size, 2), dtype=np.float32) # vortex_spacing = 0.5 # extra_factor = 2. # # a = np.array([1, 0]) * vortex_spacing # b = np.array([np.cos(np.pi / 3), np.sin(np.pi / 3)]) * vortex_spacing # rnv = int(2 * extra_factor / vortex_spacing) # vortices = [n * a + m * b for n in range(-rnv, rnv) for m in range(-rnv, rnv)] # vortices = [(x, y) for (x, y) in vortices if -extra_factor < x < extra_factor and -extra_factor < y < extra_factor] # # xs = np.linspace(-1, 1, size).astype(np.float32)[None, :] # ys = np.linspace(-1, 1, size).astype(np.float32)[:, None] # # for (x, y) in vortices: # rsq = (xs - x) ** 2 + (ys - y) ** 2 # vectors[..., 0] += (ys - y) / rsq # vectors[..., 1] += -(xs - x) / rsq # # for y in range(size): # for x in range(size): # angles[y, x] = math.atan(vectors[y, x, 1] / vectors[y, x, 0]) * 180 / math.pi # for y in range(size): # for x in range(size): # xx = float(x - size / 2) # yy = float(y - size / 2) # rsq = xx ** 2 + yy ** 2 # if (rsq == 0): # vectors[y, x, 0] = 0 # vectors[y, x, 1] = 0 # else: # vectors[y, x, 0] = -yy / rsq # vectors[y, x, 1] = xx / rsq # f = h5py.File("../datasets/fake/vector_fields/cat_test3.h5", 'r') # a_group_key = list(f.keys())[0] # vectors = f[a_group_key][:] # vectors = utils.tensor_load_vector_field(vectors) # vectors = Variable(vectors, requires_grad=False) # load the pre-trained vgg-16 and extract features vgg = Vgg16() # utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict(torch.load(os.path.join(args.vgg_model_dir, 'vgg16.weight'))) if args.cuda: style_image = style_image.cuda() vgg.cuda() features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # load the transformer net and extract features transformer_phi1 = TransformerNet() transformer_phi1.load_state_dict(torch.load(args.transformer_model_phi1_path)) if args.cuda: # vectors = vectors.cuda() content_image = content_image.cuda() transformer_phi1.cuda() vectors = transformer_phi1(content_image) vectors = Variable(vectors.data, requires_grad=False) # init optimizer content_image_size = content_image.data.size() output_size = np.asarray(content_image_size) output_size[1] = 3 output_size = torch.Size(output_size) output = Variable(torch.randn(output_size, device="cuda"), requires_grad=True) optimizer = Adam([output], lr=args.lr) mse_loss = torch.nn.MSELoss() cosine_loss = torch.nn.CosineEmbeddingLoss() # label = torch.ones(1, 1, args.content_size, args.content_size) label = torch.ones(1, 128, 128, 128) if args.cuda: label = label.cuda() # optimize the images transformer_phi2 = TransformerNet() transformer_phi2.load_state_dict(torch.load(args.transformer_model_phi2_path)) if args.cuda: transformer_phi2.cuda() tbar = trange(args.iters) for e in tbar: utils.imagenet_clamp_batch(output, 0, 255) optimizer.zero_grad() transformer_input = utils.gray_bgr_batch(output) transformer_y = transformer_phi2(transformer_input) content_loss = args.content_weight * cosine_loss(vectors, transformer_y, label) # content_loss = args.content_weight * mse_loss(vectors, transformer_y) vgg_input = output features_y = vgg(vgg_input) style_loss = 0 for m in range(len(features_y)): gram_y = utils.gram_matrix(features_y[m]) gram_s = Variable(gram_style[m].data, requires_grad=False) style_loss += args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss # total_loss = content_loss total_loss.backward() optimizer.step() tbar.set_description(str(total_loss.data.cpu().numpy().item())) if ((e+1) % args.log_interval == 0): print("iter: %d content_loss: %f style_loss %f" % (e, content_loss.item(), style_loss.item())) # save the image output = utils.add_imagenet_mean_batch_device(output, args.cuda) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def optimize(args): style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) # style_image = utils.subtract_imagenet_mean_batch(style_image) # generate the vector field that we want to stylize size = args.content_size vectors = np.zeros((size, size, 2), dtype=np.float32) eps = 1e-7 for y in range(size): for x in range(size): xx = float(x - size / 2) yy = float(y - size / 2) rsq = xx**2 + yy**2 if (rsq == 0): vectors[y, x, 0] = -1 vectors[y, x, 1] = 1 else: vectors[y, x, 0] = -yy / rsq if yy != 0 else eps vectors[y, x, 1] = xx / rsq if xx != 0 else eps # vectors[y, x, 0] = -1 # vectors[y, x, 1] = 1 # load the pre-trained vgg-16 and extract features vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, 'vgg16.weight'))) if args.cuda: style_image = style_image.cuda() vgg.cuda() features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # output_size = torch.Size([1, size, size]) # output = torch.randn(output_size) * 80 + 127 # if args.cuda: # output = output.cuda() # output = output.expand(3, size, size) # output = Variable(output, requires_grad=True) output_size = torch.Size([3, size, size]) output = Variable(torch.randn(output_size, device="cuda") * 80 + 127, requires_grad=True) optimizer = Adam([output], lr=args.lr) mse_loss = torch.nn.MSELoss() loss = [] tbar = trange(args.iters) for e in tbar: utils.clamp_batch(output, 0, 255) optimizer.zero_grad() lic_input = output kernellen = 15 kernel = np.sin(np.arange(kernellen) * np.pi / kernellen) kernel = kernel.astype(np.float32) loss.append(args.content_weight * lic.line_integral_convolution( vectors, lic_input, kernel, args.cuda)) # vgg_input = output.unsqueeze(0) # features_y = vgg(vgg_input) # style_loss = 0 # for m in range(len(features_y)): # gram_y = utils.gram_matrix(features_y[m]) # gram_s = Variable(gram_style[m].data, requires_grad=False) # style_loss += args.style_weight * mse_loss(gram_y, gram_s) # style_loss.backward() # loss[e] += style_loss loss[e].backward() optimizer.step() tbar.set_description(str(loss[e].data.cpu().numpy().item())) # save the image if ((e + 1) % args.log_interval == 0): # print("iter: %d content_loss: %f style_loss %f" % (e, loss[e].item(), style_loss.item())) utils.tensor_save_bgrimage(output.data, "output_iter_" + str(e + 1) + ".jpg", args.cuda)