Ejemplo n.º 1
0
def optimize(args):
    """    Gatys et al. CVPR 2017
    ref: Image Style Transfer Using Convolutional Neural Networks
    """
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # load the content and style target
    content_image = utils.tensor_load_rgbimage(args.content_image,
                                               ctx,
                                               size=args.content_size,
                                               keep_asp=True)
    content_image = utils.subtract_imagenet_mean_preprocess_batch(
        content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image,
                                             ctx,
                                             size=args.style_size)
    style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image)
    # load the pre-trained vgg-16 and extract features
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    # content feature
    f_xc_c = vgg(content_image)[1]
    # style feature
    features_style = vgg(style_image)
    gram_style = [net.gram_matrix(y) for y in features_style]
    # output
    output = Parameter('output', shape=content_image.shape)
    output.initialize(ctx=ctx)
    output.set_data(content_image)
    # optimizer
    trainer = gluon.Trainer([output], 'adam', {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    # optimizing the images
    for e in range(args.iters):
        utils.imagenet_clamp_batch(output.data(), 0, 255)
        # fix BN for pre-trained vgg
        with autograd.record():
            features_y = vgg(output.data())
            content_loss = 2 * args.content_weight * mse_loss(
                features_y[1], f_xc_c)
            style_loss = 0.
            for m in range(len(features_y)):
                gram_y = net.gram_matrix(features_y[m])
                gram_s = gram_style[m]
                style_loss = style_loss + 2 * args.style_weight * mse_loss(
                    gram_y, gram_s)
            total_loss = content_loss + style_loss
            total_loss.backward()

        trainer.step(1)
        if (e + 1) % args.log_interval == 0:
            print('loss:{:.2f}'.format(total_loss.asnumpy()[0]))

    # save the image
    output = utils.add_imagenet_mean_batch(output.data())
    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
Ejemplo n.º 2
0
def optimize(args):
    """    Gatys et al. CVPR 2017
    ref: Image Style Transfer Using Convolutional Neural Networks
    """
    # load the content and style target
    content_image = utils.tensor_load_rgbimage(args.content_image,
                                               size=args.content_size,
                                               keep_asp=True)
    content_image = content_image.unsqueeze(0)
    content_image = Variable(utils.preprocess_batch(content_image),
                             requires_grad=False)
    content_image = utils.subtract_imagenet_mean_batch(content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image,
                                             size=args.style_size)
    style_image = style_image.unsqueeze(0)
    style_image = Variable(utils.preprocess_batch(style_image),
                           requires_grad=False)
    style_image = utils.subtract_imagenet_mean_batch(style_image)

    # load the pre-trained vgg-16 and extract features
    vgg = Vgg16()
    utils.init_vgg16(args.vgg_model_dir)
    vgg.load_state_dict(
        torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight")))
    if args.cuda:
        content_image = content_image.cuda()
        style_image = style_image.cuda()
        vgg.cuda()
    features_content = vgg(content_image)
    f_xc_c = Variable(features_content[1].data, requires_grad=False)
    features_style = vgg(style_image)
    gram_style = [utils.gram_matrix(y) for y in features_style]
    # init optimizer
    output = Variable(content_image.data, requires_grad=True)
    optimizer = Adam([output], lr=args.lr)
    mse_loss = torch.nn.MSELoss()
    # optimizing the images
    for e in range(args.iters):
        utils.imagenet_clamp_batch(output, 0, 255)
        optimizer.zero_grad()
        features_y = vgg(output)
        content_loss = args.content_weight * mse_loss(features_y[1], f_xc_c)

        style_loss = 0.
        for m in range(len(features_y)):
            gram_y = utils.gram_matrix(features_y[m])
            gram_s = Variable(gram_style[m].data, requires_grad=False)
            style_loss += args.style_weight * mse_loss(gram_y, gram_s)

        total_loss = content_loss + style_loss

        if (e + 1) % args.log_interval == 0:
            print(total_loss.data.cpu().numpy()[0])
        total_loss.backward()

        optimizer.step()
    # save the image
    output = utils.add_imagenet_mean_batch(output)
    utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
Ejemplo n.º 3
0
def optimize(args):
    """    Gatys et al. CVPR 2017
    ref: Image Style Transfer Using Convolutional Neural Networks
    """
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # load the content and style target
    content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True)
    content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size)
    style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image)
    # load the pre-trained vgg-16 and extract features
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    # content feature
    f_xc_c = vgg(content_image)[1]
    # style feature
    features_style = vgg(style_image)
    gram_style = [net.gram_matrix(y) for y in features_style]
    # output
    output = Parameter('output', shape=content_image.shape)
    output.initialize(ctx=ctx)
    output.set_data(content_image)
    # optimizer
    trainer = gluon.Trainer([output], 'adam',
                            {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    # optimizing the images
    for e in range(args.iters):
        utils.imagenet_clamp_batch(output.data(), 0, 255)
        # fix BN for pre-trained vgg
        with autograd.record():
            features_y = vgg(output.data())
            content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)
            style_loss = 0.
            for m in range(len(features_y)):
                gram_y = net.gram_matrix(features_y[m])
                gram_s = gram_style[m]
                style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s)
            total_loss = content_loss + style_loss
            total_loss.backward()

        trainer.step(1)
        if (e + 1) % args.log_interval == 0:
            print('loss:{:.2f}'.format(total_loss.asnumpy()[0]))
        
    # save the image
    output = utils.add_imagenet_mean_batch(output.data())
    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
Ejemplo n.º 4
0
def optimize(args):
    content_image = utils.tensor_load_grayimage(args.content_image, size=args.content_size)
    content_image = content_image.unsqueeze(0)
    content_image = Variable(content_image, requires_grad=False)
    content_image = utils.subtract_imagenet_mean_batch_gray(content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size)
    style_image = style_image.unsqueeze(0)
    style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False)
    style_image = utils.subtract_imagenet_mean_batch(style_image)

    # generate the vector field that we want to stylize
    # size = args.content_size
    # vectors = np.zeros((size, size, 2), dtype=np.float32)

    # vortex_spacing = 0.5
    # extra_factor = 2.
    #
    # a = np.array([1, 0]) * vortex_spacing
    # b = np.array([np.cos(np.pi / 3), np.sin(np.pi / 3)]) * vortex_spacing
    # rnv = int(2 * extra_factor / vortex_spacing)
    # vortices = [n * a + m * b for n in range(-rnv, rnv) for m in range(-rnv, rnv)]
    # vortices = [(x, y) for (x, y) in vortices if -extra_factor < x < extra_factor and -extra_factor < y < extra_factor]
    #
    # xs = np.linspace(-1, 1, size).astype(np.float32)[None, :]
    # ys = np.linspace(-1, 1, size).astype(np.float32)[:, None]
    #
    # for (x, y) in vortices:
    #     rsq = (xs - x) ** 2 + (ys - y) ** 2
    #     vectors[..., 0] += (ys - y) / rsq
    #     vectors[..., 1] += -(xs - x) / rsq
    #
    # for y in range(size):
    #     for x in range(size):
    #         angles[y, x] = math.atan(vectors[y, x, 1] / vectors[y, x, 0]) * 180 / math.pi

    # for y in range(size):
    #     for x in range(size):
    #         xx = float(x - size / 2)
    #         yy = float(y - size / 2)
    #         rsq = xx ** 2 + yy ** 2
    #         if (rsq == 0):
    #             vectors[y, x, 0] = 0
    #             vectors[y, x, 1] = 0
    #         else:
    #             vectors[y, x, 0] = -yy / rsq
    #             vectors[y, x, 1] = xx / rsq
    # f = h5py.File("../datasets/fake/vector_fields/cat_test3.h5", 'r')
    # a_group_key = list(f.keys())[0]
    # vectors = f[a_group_key][:]
    # vectors = utils.tensor_load_vector_field(vectors)
    # vectors = Variable(vectors, requires_grad=False)

    # load the pre-trained vgg-16 and extract features
    vgg = Vgg16()
    # utils.init_vgg16(args.vgg_model_dir)
    vgg.load_state_dict(torch.load(os.path.join(args.vgg_model_dir, 'vgg16.weight')))
    if args.cuda:
        style_image = style_image.cuda()
        vgg.cuda()
    features_style = vgg(style_image)
    gram_style = [utils.gram_matrix(y) for y in features_style]

    # load the transformer net and extract features
    transformer_phi1 = TransformerNet()
    transformer_phi1.load_state_dict(torch.load(args.transformer_model_phi1_path))
    if args.cuda:
        # vectors = vectors.cuda()
        content_image = content_image.cuda()
        transformer_phi1.cuda()
    vectors = transformer_phi1(content_image)
    vectors = Variable(vectors.data, requires_grad=False)

    # init optimizer
    content_image_size = content_image.data.size()
    output_size = np.asarray(content_image_size)
    output_size[1] = 3
    output_size = torch.Size(output_size)
    output = Variable(torch.randn(output_size, device="cuda"), requires_grad=True)
    optimizer = Adam([output], lr=args.lr)
    mse_loss = torch.nn.MSELoss()
    cosine_loss = torch.nn.CosineEmbeddingLoss()
    # label = torch.ones(1, 1, args.content_size, args.content_size)
    label = torch.ones(1, 128, 128, 128)
    if args.cuda:
        label = label.cuda()

    # optimize the images
    transformer_phi2 = TransformerNet()
    transformer_phi2.load_state_dict(torch.load(args.transformer_model_phi2_path))
    if args.cuda:
        transformer_phi2.cuda()
    tbar = trange(args.iters)
    for e in tbar:
        utils.imagenet_clamp_batch(output, 0, 255)
        optimizer.zero_grad()
        transformer_input = utils.gray_bgr_batch(output)
        transformer_y = transformer_phi2(transformer_input)
        content_loss = args.content_weight * cosine_loss(vectors, transformer_y, label)
        # content_loss = args.content_weight * mse_loss(vectors, transformer_y)

        vgg_input = output
        features_y = vgg(vgg_input)
        style_loss = 0
        for m in range(len(features_y)):
            gram_y = utils.gram_matrix(features_y[m])
            gram_s = Variable(gram_style[m].data, requires_grad=False)
            style_loss += args.style_weight * mse_loss(gram_y, gram_s)

        total_loss = content_loss + style_loss
        # total_loss = content_loss
        total_loss.backward()
        optimizer.step()
        tbar.set_description(str(total_loss.data.cpu().numpy().item()))
        if ((e+1) % args.log_interval == 0):
            print("iter: %d content_loss: %f style_loss %f" % (e, content_loss.item(), style_loss.item()))

    # save the image
    output = utils.add_imagenet_mean_batch_device(output, args.cuda)
    utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
Ejemplo n.º 5
0
def optimize(args):
    style_image = utils.tensor_load_rgbimage(args.style_image,
                                             size=args.style_size)
    style_image = style_image.unsqueeze(0)
    style_image = Variable(utils.preprocess_batch(style_image),
                           requires_grad=False)
    style_image = utils.subtract_imagenet_mean_batch(style_image)

    # generate the vector field that we want to backward from
    size = args.content_size
    vectors = np.zeros((size, size, 2), dtype=np.float32)
    vortex_spacing = 0.5
    extra_factor = 2.

    a = np.array([1, 0]) * vortex_spacing
    b = np.array([np.cos(np.pi / 3), np.sin(np.pi / 3)]) * vortex_spacing
    rnv = int(2 * extra_factor / vortex_spacing)
    vortices = [
        n * a + m * b for n in range(-rnv, rnv) for m in range(-rnv, rnv)
    ]
    vortices = [(x, y) for (x, y) in vortices
                if -extra_factor < x < extra_factor
                and -extra_factor < y < extra_factor]

    xs = np.linspace(-1, 1, size).astype(np.float32)[None, :]
    ys = np.linspace(-1, 1, size).astype(np.float32)[:, None]

    for (x, y) in vortices:
        rsq = (xs - x)**2 + (ys - y)**2
        vectors[..., 0] += (ys - y) / rsq
        vectors[..., 1] += -(xs - x) / rsq
    # for y in range(size):
    #     for x in range(size):
    #         xx = float(x - size / 2)
    #         yy = float(y - size / 2)
    #         rsq = xx ** 2 + yy ** 2
    #         if rsq == 0:
    #             vectors[y, x, 0] = 1
    #             vectors[y, x, 1] = 1
    #         else:
    #             vectors[y, x, 0] = -yy / rsq
    #             vectors[y, x, 1] = xx / rsq
    #         # vectors[y, x, 0] = 1
    #         # vectors[y, x, 1] = -1
    vectors = utils.tensor_load_vector_field(vectors)

    # load the pre-trained vgg-16 and extract features
    vgg = Vgg16()
    utils.init_vgg16(args.vgg_model_dir)
    vgg.load_state_dict(
        torch.load(os.path.join(args.vgg_model_dir, 'vgg16.weight')))
    if args.cuda:
        style_image = style_image.cuda()
        vgg.cuda()
    features_style = vgg(style_image)
    gram_style = [utils.gram_matrix(y) for y in features_style]

    # load the sobel network
    sobel = Sobel()
    if args.cuda:
        vectors = vectors.cuda()
        sobel.cuda()

    # init optimizer
    vectors_size = vectors.data.size()
    output_size = np.asarray(vectors_size)
    output_size[1] = 3
    output_size = torch.Size(output_size)
    output = Variable(torch.randn(output_size, device="cuda") * 30,
                      requires_grad=True)
    optimizer = Adam([output], lr=args.lr)
    cosine_loss = CosineLoss()
    mse_loss = torch.nn.MSELoss()

    #optimize the images
    tbar = trange(args.iters)
    for e in tbar:
        utils.imagenet_clamp_batch(output, 0, 255)
        optimizer.zero_grad()
        sobel_input = utils.gray_bgr_batch(output)
        sobel_y = sobel(sobel_input)
        content_loss = args.content_weight * cosine_loss(vectors, sobel_y)

        vgg_input = output
        features_y = vgg(vgg_input)
        style_loss = 0
        for m in range(len(features_y)):
            gram_y = utils.gram_matrix(features_y[m])
            gram_s = Variable(gram_style[m].data, requires_grad=False)
            style_loss += args.style_weight * mse_loss(gram_y, gram_s)

        total_loss = content_loss + style_loss
        total_loss.backward()
        optimizer.step()
        if ((e + 1) % args.log_interval == 0):
            print("iter: %d content_loss: %f style_loss %f" %
                  (e, content_loss.item() / args.content_weight,
                   style_loss.item() / args.style_weight))
        tbar.set_description(str(total_loss.data.cpu().numpy().item()))

    # save the image
    output = utils.add_imagenet_mean_batch_device(output, args.cuda)
    utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)