Exemple #1
0
    def __init__(self, img_h=cfg.img_h, img_w=cfg.img_w):
        self.use_sigmoid = False
        self.is_training = tf.placeholder_with_default(True,
                                                       shape=[],
                                                       name='is_training')

        self.x_image = tf.placeholder(tf.float32,
                                      shape=[cfg.batch_size, img_h, img_w, 3])
        self.y_image = tf.placeholder(tf.float32,
                                      shape=[cfg.batch_size, img_h, img_w, 3])
        self.fake_x = tf.placeholder(tf.float32,
                                     shape=[cfg.batch_size, img_h, img_w, 3])
        self.fake_y = tf.placeholder(tf.float32,
                                     shape=[cfg.batch_size, img_h, img_w, 3])

        self.vgg_weights, self.vgg_mean_pixel = vgg.load_net(network)

        self.G = Generator('G', self.is_training)
        self.D_Y = Discriminator('D_Y',
                                 self.is_training,
                                 norm=cfg.norm,
                                 use_sigmoid=self.use_sigmoid)
        self.F = Generator('F', self.is_training)
        self.D_X = Discriminator('D_X',
                                 self.is_training,
                                 norm=cfg.norm,
                                 use_sigmoid=self.use_sigmoid)
def main(argv):
    parser = build_parser()
    options = parser.parse_args(args=argv)

    content = options.content
    style = options.style
    scale = options.scale
    output_filename = options.output
    create_if_needed(output_filename)

    neural_style_home = os.getenv('NEURAL_STYLE_HOME')
    if neural_style_home is None:
        print('NEURAL_STYLE_HOME is not set')

    vgg_weights, vgg_mean_pixel = vgg.load_net(
        os.path.join(neural_style_home, 'imagenet-vgg-verydeep-19.mat'))

    argv = [
        '--content', content, '--styles', style, '--output', output_filename,
        '--style-scales', scale
    ]

    neural_style.main(argv, vgg_weights, vgg_mean_pixel)
def styleloss(network, image1, image2, layer_weight_exp, pooling):
    """
    Calculate style similarity utilizing style (gram) matrix.
    This function returns "style loss", which indicates how dissimilar two input images are.
    """
    image1_shape = (1, ) + image1.shape  # (1, height, width, number)
    image2_shape = (1, ) + image2.shape
    image1_features = {}
    image2_features = {}

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    layers_weights = {}
    for layer in LAYERS:
        layers_weights[layer] = layer_weight
        layer_weight *= layer_weight_exp

    # normalize layer weights
    layer_weights_sum = 0
    for layer in LAYERS:
        layer_weights_sum += layers_weights[layer]
    for layer in LAYERS:
        layers_weights[layer] /= layer_weights_sum

    # compute image1 features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=image1_shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        image1_pre = np.array([vgg.preprocess(image1, vgg_mean_pixel)])
        for layer in LAYERS:
            features = net[layer].eval(feed_dict={image: image1_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features)
            image1_features[layer] = gram

    # compute image2 features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=image2_shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        image2_pre = np.array([vgg.preprocess(image2, vgg_mean_pixel)])
        for layer in LAYERS:
            features = net[layer].eval(feed_dict={image: image2_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features)
            image2_features[layer] = gram

    # calculate style loss from gram matrices
    with tf.Graph().as_default():
        style_loss = 0
        style_losses = []
        for layer in LAYERS:
            temp_layer = net[layer]
            _, height, width, number = map(lambda i: i.value,
                                           temp_layer.get_shape())
            size = height * width * number
            image1_gram = image1_features[layer]
            image2_gram = image2_features[layer]
            style_losses.append(layers_weights[layer] *
                                tf.nn.l2_loss(image1_gram - image2_gram) /
                                size**2)
        style_losses = reduce(tf.add, style_losses)
        with tf.Session() as sess:
            style_loss = style_losses.eval()

        return style_loss
Exemple #4
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
            content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
            learning_rate, beta1, beta2, epsilon, pooling, exp_sigma, mat_sigma, mat_rho, text_to_print,
            print_iterations=None, checkpoint_iterations=None, kernel=3, d=2, gamma_rho=1, gamma=1, rational_rho=1, alpha=1):

    tf.logging.set_verbosity(tf.logging.INFO)
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]

    0 - dot product kernel
    1 - exponential kernel
    2 - matern kernel
    3 - polynomial kernel

    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))

                # sqr = features.T*features.T
                # dim = features.shape

                if(kernel == 0):
                    gram2 = np.matmul(features.T, features) / features.size
                elif(kernel == 1):
                    gram2 = gramSquaredExp_np(features, exp_sigma) / features.size  # exponential kernal
                elif(kernel == 2):
                    gram2 = gramMatten_np(features, mat_sigma, v, mat_rho) / features.size  # Mattern kernal
                elif(kernel == 3):
                    print(d)
                    gram2 = gramPoly_np(features, C=0, d=d) / features.size
                elif(kernel == 4):
                    gram2 = gramGammaExp_np(features, gamma_rho, gamma) / features.size
                elif(kernel == 4):
                    gram2 = gramRatioanlQuad_np(features, rational_rho, alpha) / features.size

                    # print(features.shape,"diamention of feature\n")
                style_features[i][layer] = gram2

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    g = tf.Graph()
    with g.as_default(), g.device('/gpu'):
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                net[content_layer] - content_features[content_layer]) /
                content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))

                style_gram = style_features[i][style_layer]

                dim = feats.get_shape()
                # print(dim)

                sqr = tf.reduce_sum(tf.transpose(feats) * tf.transpose(feats), axis=1)

                if(kernel == 0):
                    gram = (tf.matmul(tf.transpose(feats), feats)) / size
                elif(kernel == 1):
                    gram = tf.exp(-1 * (tf.transpose(tf.ones([dim[1], dim[1]]) * sqr) + tf.ones([dim[1], dim[1]]) * sqr - 2 *
                                        tf.matmul(tf.transpose(feats), feats)) / 2 / (exp_sigma * exp_sigma)) / size  # exponetial kernal
                elif(kernel == 2):
                    # mattern kernal
                    d2 = tf.nn.relu(tf.transpose(tf.ones([dim[1], dim[1]]) * sqr) + tf.ones([dim[1], dim[1]]) * sqr - 2 * tf.matmul(tf.transpose(feats), feats))
                    if(v == 0.5):
                        gram = mat_sigma**2 * tf.exp(-1 * tf.sqrt(d2) / mat_rho) / size
                    elif(v == 1.5):
                        gram = mat_sigma**2 * (tf.ones([dim[1], dim[1]]) + tf.sqrt(3.0) * tf.sqrt(d2) / mat_rho) * tf.exp(-1 * tf.sqrt(3.0) * tf.sqrt(d2) / mat_rho) / size
                    elif(v == 2.5):
                        gram = mat_sigma**2 * (tf.ones([dim[1], dim[1]]) + tf.sqrt(5.0) * tf.sqrt(d2) / mat_rho + 5 * d2 / 3 / (mat_rho**2)) * tf.exp(-1 * tf.sqrt(5.0) * tf.sqrt(d2) / mat_rho) / size
                elif(kernel == 3):
                    # polynomial kernal
                    gram = (tf.matmul(tf.transpose(feats), feats))**d / size
                elif(kernel == 4):
                    # gamma exponental kernal
                    gram = tf.exp(-1 * (tf.sqrt(d2) / gamma_rho)**gamma) / size
                elif(kernel == 5):
                    # gamma exponental kernal
                    gram = (1 + (d2 / rational_rho**2 / 2 / alpha))**(-1 * alpha) / size

                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)

            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])

        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :]) /
             tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :]) /
             tv_x_size))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        # train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress(last_loss):
            new_loss = loss.eval()
            stderr.write('file ===>  %s \n' % text_to_print)
            stderr.write('  content loss: %1.3e \t' % content_loss.eval())
            stderr.write('    style loss: %1.3e \t' % style_loss.eval())
            stderr.write('       tv loss: %1.3e \t' % tv_loss.eval())
            stderr.write('    total loss: %1.3e \t' % new_loss)
            stderr.write('    loss difference: %1.3e \t\n' % (last_loss - new_loss))
            return new_loss

        def save_progress():
            dict = {"content loss": content_loss.eval(), "style loss": style_loss.eval(), "tv loss": tv_loss.eval(), "total loss": loss.eval()}
            return dict

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            new_loss = 0
            # if (print_iterations and print_iterations != 0):
            #     print_progress()
            for i in range(iterations):
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                    new_loss = print_progress(new_loss)

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    dict = save_progress()
                    this_loss = loss.eval()
                    print(this_loss, "loss in each check point")
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    try:
                        img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)
                    except:
                        print("uanlabe to result image due to given parameters")
                        img_out = "no  image"

                    if preserve_colors and preserve_colors:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))

                    yield (
                        (None if last_step else i),
                        img_out, dict
                    )
def stylize(network_path='imagenet-vgg-very0.001p-19.mat', content, styles, iterations=1000,
        content_weight=5e0, content_weight_blend=1, style_weight=5e2, style_layer_weight_exp=1, style_blend_weights=None, tv_weight=1e2,
        learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, pooling='avg',
        print_iterations=100, checkpoint_iterations=100, checkpoint_path=None, output_path=None):

    """
    This is a function to stylelize images,
    given the content image, list of style images, path to the network and all the hypter parameters.

    Returns
    -------
    stylized_img : np.ndarray
        N x H x W x C image.
    """
    # calculate the shape of the network input tensor according to the content image
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network_path)

    # scale the importance of each sytle layers according to their depth. (deeper layers are more important if style_layers_weights > 1 (default = 1))
    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features of the content image by feeding it into the network
    @TODO why put graph on cpu?, what is the high level idea of content_features?
    g = tf.Graph()
    with g.as_default(), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features of the content image by feeding it into the network
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram


    # make stylized image using backpropogation
    # if the users doesn't specify a input image, start with noise
    # @TODO where does the number 0.256 come from?
    with tf.Graph().as_default():

        initial = tf.random_normal(shape) * 0.256

        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss, we can adjust the weight of each CONTENT_LAYERS
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # We can specify different weight for different style images
        if style_blend_weights is None:
            # default is equal weights
            style_blend_weights = [1.0/len(style_images) for _ in style_images]
        else:
            total_blend_weight = sum(style_blend_weights)
            # normalization
            style_blend_weights = [weight/total_blend_weight
                                   for weight in style_blend_weights]
        # style loss
        style_loss = 0
        # iterate to calculate style lose with multiple style images
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)


        # total variation denoising, according to the paper
        # Mahendran, Aravindh, and Andrea Vedaldi. "Understanding deep image representations by inverting them."
        # Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015.
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    # yield (
                    #     (None if last_step else i),
                    #     img_out
                    # )
                    output_file = None
                    if not last_step:
                        if checkpoint_path:
                            output_file = checkpoint_path % iteration
                    else:
                        output_file = output_path

                    if output_file:
                        imsave(output_file, image)
def stylize_c(network,
              initial,
              initial_noiseblend,
              content,
              styles,
              preserve_colors,
              iterations,
              content_weight,
              content_weight_blend,
              style_weight,
              style_layer_weight_exp,
              style_blend_weights,
              tv_weight,
              learning_rate,
              beta1,
              beta2,
              epsilon,
              pooling,
              prev_style_image,
              prev_content_image,
              print_iterations=None,
              checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            #noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            noise = content
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        #content_layers_weights['conv2_2'] = content_weight_blend
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))

        #Continuity Loss
        #K = np.array([[1/256,4/256,6/256,4/256,1/256],
        #[4/256,16/256,24/256,16/256,4/256],
        #[6/256,24/256,36/256,24/256,6/256],
        #[4/256,16/256,24/256,16/256,4/256],
        #[1/256,4/256,6/256,4/256,1/256]], dtype=np.float32)
        #G_filt = np.zeros([5,5,3],dtype=np.float32)
        #G_filt[:,:,0] = K
        #G_filt[:,:,1] = K
        #G_filt[:,:,2] = K
        #filterG = tf.convert_to_tensor(G_filt,dtype=tf.float32)
        #filterG = tf.reshape(filterG, [5,5,3,1])
        #G_filt = tf.reshape(K, [5,5,1,1], name='G_filt')
        #G_filt = tf.convert_to_tensor(K, dtype=tf.float32)
        #tf.expand_dims(G_filt,0)
        #tf.expand_dims(G_filt,0)

        tf_org_img = tf.convert_to_tensor(content, dtype=tf.float32)
        tf_org_img = tf.reshape(tf_org_img, tf.shape(image))
        tf_prev_img = tf.convert_to_tensor(prev_content_image,
                                           dtype=tf.float32)
        tf_prev_img = tf.reshape(tf_prev_img, tf.shape(image))
        tf_prev_styl = tf.convert_to_tensor(prev_style_image, dtype=tf.float32)
        tf_prev_styl = tf.reshape(tf_prev_styl, tf.shape(image))

        #smth_org_frame_diff = tf.nn.conv2d(tf_org_img - tf_prev_img,filterG,strides=[1, 1, 1, 1],padding='VALID')
        #smth_styl_frame_diff = tf.nn.conv2d(image - tf_prev_styl,filterG,strides=[1, 1, 1, 1],padding='VALID')
        #org_frame_diff = tf.norm(smth_org_frame_diff)
        #styl_frame_diff = tf.norm(smth_styl_frame_diff)

        org_frame_diff = tf.norm(tf_org_img - tf_prev_img)
        styl_frame_diff = tf.norm(tf_prev_styl - image)

        hyperparam_cl = 10e4
        cl_loss = tf.multiply(
            hyperparam_cl,
            tf.divide(styl_frame_diff, org_frame_diff +
                      3 * content.shape[0] * content.shape[1]))

        # overall loss
        loss = content_loss + style_loss + cl_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('     content loss: %g\n' % content_loss.eval())
            stderr.write('       style loss: %g\n' % style_loss.eval())
            stderr.write('          tv loss: %g\n' % tv_loss.eval())
            stderr.write('  Continuity loss: %g\n' % cl_loss.eval())
            stderr.write('       total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(
                        iteration_times[-10:]) * (iterations - i)
                    stderr.write(
                        'Iteration %4d/%4d (%s elapsed, %s remaining)\n' %
                        (i + 1, iterations, hms(elapsed), hms(remaining)))

                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
def sparsizeALL(network,
                img,
                regularisation_coeff,
                iterations,
                learning_rate,
                beta1,
                beta2,
                epsilon,
                pooling,
                print_iterations=None,
                checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """

    shape = (1, ) + img.shape
    img_conteneur = np.zeros(shape, dtype='float32')
    img_conteneur[0, :, :, :] = img
    vgg_weights, vgg_mean_pixel = vgg.load_net(network)
    loss_curve = []
    sparsness_curve = []

    # make sparse encoded image using backpropogation
    with tf.Graph().as_default():

        init_image = tf.constant(img_conteneur)
        net_init = vgg.net_preloaded(vgg_weights, init_image, pooling)

        layer_nonzero_init = []
        layer_size = []

        for layer in RELU_LAYERS:
            init_encoding = net_init[layer]
            layer_nonzero_init.append(tf.count_nonzero(init_encoding))
            layer_size.append(tf.size(init_encoding))

        nonzero_init = tf.reduce_sum(layer_nonzero_init)
        size = tf.reduce_sum(layer_size)
        init_sparsness = 1. - tf.to_float(nonzero_init) / tf.to_float(size)

        image = tf.Variable(tf.random_normal(shape) * 0.256)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        layer_l1_norm = []
        layer_nonzero = []
        layer_size = []

        for layer in RELU_LAYERS:
            encoding = net[layer]
            layer_l1_norm.append(tf.reduce_sum(encoding))
            layer_nonzero.append(tf.count_nonzero(encoding))
            layer_size.append(tf.size(encoding))

        reg_term = tf.reduce_sum(layer_l1_norm)
        nonzero = tf.reduce_sum(layer_nonzero)
        sparsness = 1. - tf.to_float(nonzero) / tf.to_float(size)

        lbda = tf.constant(regularisation_coeff)
        loss = tf.nn.l2_loss(image - init_image) + lbda * reg_term

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write(' loss: %g\n' % loss.eval())
            stderr.write(' initial sparsness: %g\n' % init_sparsness.eval())
            stderr.write(' final sparsness: %g\n' % sparsness.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(
                        iteration_times[-10:]) * (iterations - i)
                    stderr.write(
                        'Iteration %4d/%4d (%s elapsed, %s remaining)\n' %
                        (i + 1, iterations, hms(elapsed), hms(remaining)))
                    loss_curve.append(sess.run(loss))
                    sparsness_curve.append(sess.run(sparsness))
                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = best.reshape(shape[1:])

                    yield ((None if last_step else i), img_out)

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)

    fig1 = plt.figure('loss')
    plt.xlabel('Iterations')
    plt.ylabel('Fonction de perte')
    plt.title('Fonction de perte au fil des itérations')
    plt.step(np.arange(1, iterations), loss_curve, 'r')

    fig2 = plt.figure('sparsness')
    plt.xlabel('Iterations')
    plt.title("Parcimonie de l'encodage de la nouvelle image")
    plt.ylabel("Pourcentage de zéro dans l'encodage de la nouvelle image")
    plt.step(np.arange(1, iterations), sparsness_curve, 'b')

    # fig1.savefig('body1_transformation/' + layer + '_' + str(regularisation_coeff) + '_loss_plot.png')
    # fig2.savefig('body1_transformation/' + layer + '_' + str(regularisation_coeff) + '_sparsness_plot.png')

    plt.show()
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            content_mask,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    mask_shape = (1, ) + content.shape[0:2] + (1, )
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]
    content_mask_features = {}
    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        mask = tf.placeholder('float', shape=mask_shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        net_mask = vgg.net_downsample(vgg_weights, mask)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})
        for layer in CONTENT_LAYERS + STYLE_LAYERS:
            content_mask_features[layer] = net_mask[layer].eval(feed_dict={
                mask:
                np.expand_dims(np.expand_dims(content_mask, axis=0), axis=4)
            })
#            plt.imshow(np.squeeze(content_mask_features[layer]))
#            plt.show()
#            plt.pause(0.01)

# compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features_bank = sk_image.extract_patches_2d(
                    np.squeeze(features), (kernel_s, kernel_s))
                style_features[i][layer] = [features_bank, features]

#    plt.imshow(np.squeeze(initial).astype(np.uint8))
#    plt.show()
#    plt.pause(0.01)

# make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            #            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')


#            initial_content_noise_coeff = 1.0 - initial_noiseblend
#            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
#            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)

#        plt.imshow(np.squeeze(initial))
#        plt.show()
#        plt.pause(0.01)

        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        # content loss
        content_layers_weights = {}
        for layer in CONTENT_LAYERS:
            content_layers_weights[layer] = 1. / len(CONTENT_LAYERS)
        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            map_ = (net[content_layer] - content_features[content_layer])
            #            map_ = (net[content_layer] - content_features[content_layer])*(1.-content_mask_features[content_layer])
            loss_ = content_layers_weights[content_layer] * content_weight * (
                2 * tf.nn.l2_loss(map_) / content_features[content_layer].size)
            content_losses.append(loss_)
        content_loss += reduce(tf.add, content_losses)
        #        plt.imshow(1-np.squeeze(content_mask_features[content_layer]))
        #        plt.show()
        #        plt.pause(100)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                # Calculate normalized layer
                layer = tf.expand_dims(net[style_layer], axis=4)
                paddings = [[0, 0], [pad, pad], [pad, pad], [0, 0], [0, 0]]
                layer_depth = layer.get_shape().as_list()[3]
                layer_pad = tf.pad(layer, paddings, "CONSTANT")
                layer_norm = tf.sqrt(
                    tf.nn.conv3d(tf.pow(layer_pad, 2),
                                 tf.ones(
                                     (kernel_s, kernel_s, layer_depth, 1, 1),
                                     dtype=tf.float32),
                                 strides=[1, 1, 1, 1, 1],
                                 padding='VALID'))
                # Calculate normalized filter bank
                style_filters = np.transpose(style_features[i][style_layer][0],
                                             (1, 2, 3, 0))
                style_filters = np.expand_dims(style_filters, axis=3)
                style_filters_norm = np.sqrt(
                    np.sum(np.power(style_filters, 2), axis=(0, 1, 2)))
                style_filters_normalized = style_filters / style_filters_norm
                # Calculate normalized correlations
                layer_filtered = tf.nn.conv3d(layer_pad,
                                              style_filters_normalized,
                                              strides=[1, 1, 1, 1, 1],
                                              padding='VALID') / layer_norm
                # Find maximum response and index into the filters
                max_filter_response_idx = tf.squeeze(
                    tf.argmax(layer_filtered, axis=4))
                max_filter_response_idx = tf.reshape(max_filter_response_idx,
                                                     [-1])
                max_filter_response_weight = tf.squeeze(
                    tf.reduce_max(tf.abs(layer_filtered), axis=4))
                max_filter_response_weight = tf.reshape(
                    max_filter_response_weight, [-1])
                max_filter_response_weight = max_filter_response_weight / tf.reduce_max(
                    max_filter_response_weight)
                style_filters_tf = tf.transpose(
                    tf.squeeze(tf.convert_to_tensor(style_filters,
                                                    np.float32)), (3, 0, 1, 2))
                style_filters_tf_gathered = tf.gather(style_filters_tf,
                                                      max_filter_response_idx)
                style_filters_tf_gathered = tf.reshape(
                    style_filters_tf_gathered,
                    (style_filters_tf_gathered.get_shape().as_list()[0], -1))
                layer_patches = tf.extract_image_patches(
                    tf.squeeze(layer_pad, axis=4), [1, kernel_s, kernel_s, 1],
                    [1, 1, 1, 1], [1, 1, 1, 1],
                    padding="VALID")
                layer_size = tf.shape(layer_patches)
                layer_patches = tf.reshape(layer_patches, (-1, layer_size[3]))
                style_weights = np.reshape(content_mask_features[style_layer],
                                           (-1))
                #                loss_ = tf.reduce_mean(tf.reduce_mean(tf.pow(layer_patches-style_filters_tf_gathered, 2),axis=1)*tf.stop_gradient(max_filter_response_weight))
                loss_ = tf.reduce_mean(
                    tf.reduce_mean(tf.pow(
                        layer_patches - style_filters_tf_gathered, 2),
                                   axis=1) * style_weights)
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    loss_)

            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                #                print(str(max_filter_response_weight.eval()))
                #                print(' ')
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Exemple #9
0
    def stylize(self, network, content, styles, iterations, content_weight,
                content_weight_blend, style_weight, style_layer_weight_exp,
                style_blend_weights, tv_weight, learning_rate, beta1, beta2,
                epsilon, pooling):
        """
        Nałożenie stylu na obraz
        Metoda jest wywoływana iteracyjnie, obliczane są straty i wagi, a potem do rodzica jest przekazywany
        tuple z iteratorem i tablicą obrazu oraz, jeśli to ostatnia iteracja, z obliczonymi stratami

        :rtype: iterator[tuple[int,image]]
        """
        self.style_features = [{} for _ in styles]
        self.content_features = {}
        self.style_shapes = [(1, ) + style.shape for style in styles]
        self.shape = (1, ) + content.shape
        self.vgg_weights, vgg_mean_pixel = vgg.load_net(network)
        self.layer_weight = 1.0
        for style_layer in self.style_layers:
            self.style_layers_weights[style_layer] = self.layer_weight
            self.layer_weight *= style_layer_weight_exp

        self.calculate_sum_weight()
        self.calculate_content_feature(pooling, content, vgg_mean_pixel)
        self.calculate_style_feature(styles, pooling, vgg_mean_pixel)

        # Użycie propagacji wstecznej na stylizowanym obrazie
        with tf.Graph().as_default():
            initial = tf.random_normal(self.shape) * 0.256
            self.image = tf.Variable(initial)
            self.net = vgg.net_preloaded(self.vgg_weights, self.image, pooling)

            self.calculate_content_loss(content_weight_blend, content_weight)
            self.calculate_style_loss(styles, style_weight,
                                      style_blend_weights)
            self.denoise_image(tv_weight)
            self.calculate_total_loss()

            # konfiguracja optymalizatora
            train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                                epsilon).minimize(self.loss)

            # optymalizacja
            best_loss = float('inf')
            best = None
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                for i in range(iterations):
                    if i > 0:
                        print('%4d/%4d' % (i + 1, iterations))
                    else:
                        print('%4d/%4d' % (i + 1, iterations))
                    train_step.run()

                    last_step = (i == iterations - 1)
                    if last_step:
                        loss_vals = self.get_loss_vals(self.loss_store)
                    else:
                        loss_vals = None

                    if last_step:
                        this_loss = self.loss.eval()
                        if this_loss < best_loss:
                            best_loss = this_loss
                            best = self.image.eval()

                        img_out = vgg.unprocess(best.reshape(self.shape[1:]),
                                                vgg_mean_pixel)
                    else:
                        img_out = None

                    yield i + 1 if last_step else i, img_out, loss_vals
Exemple #10
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape   #若content.shape=(356, 600, 3)  shape=(1,356, 600, 3)
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}                 #创建内容features map
    style_features = [{} for _ in styles]     #创建风格features map

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)  #加载预训练模型,得到weights和mean_pixel

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp     #若有设置style_layer_weight_exp,则style_layers_weights指数增长,
                                                   # style_layer_weight_exp默认为1不增长

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum   #更新style_layers_weights应该是比例,使其总和为1

    # 首先创建一个image的占位符,然后通过eval()的feed_dict将content_pre传给image,
    # 启动net的运算过程,得到了content的feature maps
    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.compat.v1.Session() as sess:   #计算content features
        image = tf.compat.v1.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)        #所有网络在此构建,net为content的features maps
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])  #content - vgg_mean_pixel
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre}) #content_features取值
            # print(layer,content_features[layer].shape)

    # compute style features in feedforward mode
    for i in range(len(styles)):                     #计算style features
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.compat.v1.Session() as sess:
            image = tf.compat.v1.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)       #pooling 默认为MAX
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])  #styles[i]-vgg_mean_pixel
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))   #根据通道数目reshape
                gram = np.matmul(features.T, features) / features.size  #gram矩阵
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256            #初始化图片
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + ( tf.random.normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        '''
        image = tf.Variable(initial)初始化了一个TensorFlow的变量,即为我们需要训练的对象。
        注意这里我们训练的对象是一张图像,而不是weight和bias。
        '''
        net = vgg.net_preloaded(vgg_weights, image, pooling)   #此处的net为生成图片的features map

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend      #内容图片 content weight blend, conv4_2 * blend + conv5_2 * (1-blend)
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend  #content weight blend默认为1,即只用conv4_2层

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /         #生成图片-内容图片
                    content_features[content_layer].size))     # tf.nn.l2_loss:output = sum(t ** 2) / 2
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        '''
        由于style图像可以输入多幅,这里使用for循环。同样的,将style_pre传给image占位符,
        启动net运算,得到了style的feature maps,由于style为不同filter响应的内积,
        因此在这里增加了一步:gram = np.matmul(features.T, features) / features.size,即为style的feature。
        '''
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size   #求得生成图片的gram矩阵
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        '''
        接下来定义了Content Loss和Style Loss,结合文中的公式很容易看懂,在代码中,
        还增加了total variation denoising,因此总的loss = content_loss + style_loss + tv_loss
        '''
        loss = content_loss + style_loss + tv_loss     #总loss为三个loss之和

        # optimizer setup
        # optimizer setup
        # 创建train_step,使用Adam优化器,优化对象是上面的loss
        # 优化过程,通过迭代使用train_step来最小化loss,最终得到一个best,即为训练优化的结果
        train_step = tf.compat.v1.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)   #还原图片

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (             #相当于return,但用于迭代
                        (None if last_step else i),
                        img_out
                    )
def stylyze(options, callback):

    parser = build_parser()
    if options is None:
        key = 'TF_CPP_MIN_LOG_LEVEL'
        if key not in os.environ:
            os.environ[key] = '2'

        options = parser.parse_args()

    if not os.path.isfile(options.network):
        parser.error("Network %s does not exist. (Did you forget to "
                     "download it?)" % options.network)

    if [options.checkpoint_iterations,
            options.checkpoint_output].count(None) == 1:
        parser.error("use either both of checkpoint_output and "
                     "checkpoint_iterations or neither")

    if options.checkpoint_output is not None:
        if re.match(r'^.*(\{.*\}|%.*).*$', options.checkpoint_output) is None:
            parser.error("To save intermediate images, the checkpoint_output "
                         "parameter must contain placeholders (e.g. "
                         "`foo_{}.jpg` or `foo_%d.jpg`")

    content_image_arr = [imread(i) for i in options.content]
    style_images = [imread(style) for style in options.styles]

    width_arr = options.width
    for i in range(len(content_image_arr)):
        width = width_arr[i]
        content_image = content_image_arr[i]
        if width is not None:
            new_shape = (int(
                math.floor(
                    float(content_image.shape[0]) / content_image.shape[1] *
                    width)), width)
            content_image = scipy.misc.imresize(content_image, new_shape)
            content_image_arr[i] = content_image
        target_shape = content_image.shape
        for j in range(len(style_images)):
            style_scale = STYLE_SCALE
            if options.style_scales is not None:
                style_scale = options.style_scales[j]
            style_images[j] = scipy.misc.imresize(
                style_images[j],
                style_scale * target_shape[1] / style_images[j].shape[1])

    style_blend_weights = options.style_blend_weights
    if style_blend_weights is None:
        # default is equal weights
        style_blend_weights = [1.0 / len(style_images) for _ in style_images]
    else:
        total_blend_weight = sum(style_blend_weights)
        style_blend_weights = [
            weight / total_blend_weight for weight in style_blend_weights
        ]

    initial_arr = content_image_arr

    # try saving a dummy image to the output path to make sure that it's writable
    output_arr = options.output
    for output in output_arr:
        if os.path.isfile(output) and not options.overwrite:
            raise IOError("%s already exists, will not replace it without "
                          "the '--overwrite' flag" % output)
        try:
            imsave(output, np.zeros((500, 500, 3)))
        except:
            raise IOError('%s is not writable or does not have a valid file '
                          'extension for an image file' % output)

    vgg_weights, vgg_mean_pixel = vgg.load_net(options.network)

    style_shapes = [(1, ) + style.shape for style in style_images]
    style_features = [{} for _ in style_images]

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= options.style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute style features in feedforward mode
    for i in range(len(style_images)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, options.pooling)
            style_pre = np.array(
                [vgg.preprocess(style_images[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - options.initial_noiseblend

    for i in range(len(content_image_arr)):
        Data.save_step(Data.get_step() + 1)
        loss_arrs = None
        for iteration, image, loss_vals in stylize(
                initial=initial_arr[i],
                content=content_image_arr[i],
                preserve_colors=options.preserve_colors,
                iterations=options.iterations,
                content_weight=options.content_weight,
                content_weight_blend=options.content_weight_blend,
                tv_weight=options.tv_weight,
                learning_rate=options.learning_rate,
                beta1=options.beta1,
                beta2=options.beta2,
                epsilon=options.epsilon,
                pooling=options.pooling,
                initial_content_noise_coeff=initial_content_noise_coeff,
                style_images=style_images,
                style_layers_weights=style_layers_weights,
                style_weight=options.style_weight,
                style_blend_weights=style_blend_weights,
                vgg_weights=vgg_weights,
                vgg_mean_pixel=vgg_mean_pixel,
                style_features=style_features,
                print_iterations=options.print_iterations,
                checkpoint_iterations=options.checkpoint_iterations,
                callback=callback):
            if (image is not None) and (options.checkpoint_output is not None):
                imsave(fmt_imsave(options.checkpoint_output, iteration), image)
            if (loss_vals is not None) \
                    and (options.progress_plot or options.progress_write):
                if loss_arrs is None:
                    itr = []
                    loss_arrs = OrderedDict(
                        (key, []) for key in loss_vals.keys())
                for key, val in loss_vals.items():
                    loss_arrs[key].append(val)
                itr.append(iteration)

        imsave(options.output[i], image)

        if options.progress_write:
            fn = "{}/progress.txt".format(os.path.dirname(options.output[i]))
            tmp = np.empty((len(itr), len(loss_arrs) + 1), dtype=float)
            tmp[:, 0] = np.array(itr)
            for ii, val in enumerate(loss_arrs.values()):
                tmp[:, ii + 1] = np.array(val)
            np.savetxt(fn,
                       tmp,
                       header=' '.join(['itr'] + list(loss_arrs.keys())))

        if options.progress_plot:
            import matplotlib
            matplotlib.use('Agg')
            from matplotlib import pyplot as plt
            fig, ax = plt.subplots()
            for key, val in loss_arrs.items():
                ax.semilogy(itr, val, label=key)
            ax.legend()
            ax.set_xlabel("iterations")
            ax.set_ylabel("loss")
            fig.savefig("{}/progress.png".format(
                os.path.dirname(options.output[i])))
Exemple #12
0
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = '0'
    with g.as_default(), g.device('/cpu:0'), tf.Session(config=config) as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session(
                config=config) as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            '''
            Compute the content loss
            
            Variables:
            content_weight: scalar constant we multiply the content_loss by.
            net[content_layer]: features of the current image, Tensor with shape [1, height, width, channels]
            content_features[content_layer]: features of the content image, Tensor with shape [1, height, width, channels]
            '''

            # features of the current image [1, height, width, channels]
            l_content = content_weight * tf.reduce_sum(
                (net[content_layer] - content_features[content_layer])**2)

            content_losses.append(content_layers_weights[content_layer] *
                                  l_content)
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, channels = map(lambda i: i.value,
                                                 layer.get_shape())
                size = height * width * channels
                '''
    	        Compute the Gram matrix of the layer
                    	        
    	        Variables:
                layer: features of the current image at style_layer, Tensor with shape [1, height, width, channels]
                gram: computed gram matrix with shape [channels, channels]
    	        '''

                feats = tf.reshape(layer, (-1, channels))
                gram = tf.matmul(tf.transpose(feats), feats)
                gram /= size
                '''
    	        Compute the style loss
    	        
    	        Variables:
    	        style_layers_weights[style_layer]: scalar constant we multiply the content_loss by.
                gram: computed gram matrix with shape [channels, channels]
    	        style_gram: computed gram matrix of the style image at style_layer with shape [channels, channels]
    	        '''
                style_gram = style_features[i][style_layer]
                l_style = style_layers_weights[style_layer] * tf.reduce_sum(
                    (gram - style_gram)**2)

                style_losses.append(l_style)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        '''
        Compute the TV loss
            	        
        Variables:
        tv_weight: scalar giving the weight to use for the TV loss.
        image: tensor of shape (1, H, W, 3) holding current image.
        '''
        tv_loss = tv_weight * (tf.reduce_sum(
            (image[:, 1:, :, :] - image[:, :-1, :, :])**2) + tf.reduce_sum(
                (image[:, :, 1:, :] - image[:, :, :-1, :])**2))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Exemple #13
0
def main(_):

    global_step = tf.Variable(0, trainable=False, name='global_step')
    invert_layer = FLAGS.invert_layer

    ### Load pre-trained VGG wieghts
    vgg_mat_file = FLAGS.vgg19
    print("pretrained-VGG : {}".format(FLAGS.vgg19))
    vgg_weights, vgg_mean_pixel = vgg.load_net(vgg_mat_file)
    print("vgg_mean_pixel : ", vgg_mean_pixel)

    ### Read input image
    image = FLAGS.image
    print("input image : {}".format(FLAGS.image))
    img = read_image(image, 224, 224)
    scipy.misc.imsave(sample_dir + '/input_image.png', img)

    img = img - vgg_mean_pixel
    img = img.astype(np.float32)
    img = np.expand_dims(img, axis=0)  # extend shape for VGG input
    img_shape = np.shape(img)
    print("Image shape : ", np.shape(img))

    gpu_options = tf.GPUOptions(allow_growth=True)
    ### Comput content feature of 'invert_layer'
    X_content_feature = {}
    content_graph = tf.Graph()
    with content_graph.as_default():
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        X_content = tf.placeholder('float32', shape=img_shape)
        network = vgg.net_preloaded(vgg_weights, img, pooling)
        X_content_feature = sess.run(network[invert_layer],
                                     feed_dict={X_content: img})

    ### Define network to learn 'X'
    # X_sigma = tf.norm(vgg_mean_pixel)*img_shape[1]   # roughly...
    # X_sigma = tf.cast(X_sigma, tf.float32)
    # X = tf.Variable(tf.random_normal(img_shape))*X_sigma
    X = tf.Variable(tf.random_normal(img_shape))
    invert_net = vgg.net_preloaded(vgg_weights, X, pooling)
    X_invert_feature = invert_net[invert_layer]

    l2_loss = tf.norm(X_content_feature - X_invert_feature,
                      'euclidean') / tf.norm(X_content_feature, 'euclidean')
    #total_variation_loss = tf.image.total_variation(img+X)[0]
    total_variation_loss = tf.reduce_sum(
        tf.image.total_variation(tf.convert_to_tensor(img + X)))
    sigma_tv = 5e-7
    loss = l2_loss + sigma_tv * total_variation_loss

    train_step = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.5).minimize(
        loss, global_step=global_step)

    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    sess.run(tf.global_variables_initializer())

    for step in range(max_tries):

        _, _loss = sess.run([train_step, loss])
        print("step: %06d" % step, "loss: {:.04}".format(_loss))
        #_tv = sess.run(total_variation_loss)
        #print("total_variation_loss : ", sigma_tv*_tv)

        # testing
        if not (step + 1) % 100:
            this_X = sess.run(X)
            this_X = this_X + vgg_mean_pixel
            scipy.misc.imsave(
                sample_dir + '/invert_{}'.format(str(step + 1).zfill(6)) +
                '.png', this_X[0])
            '''
def train(content_targets,
          style_target,
          content_weight,
          style_weight,
          tv_weight,
          vgg_path,
          epochs=2,
          print_iterations=1000,
          batch_size=4,
          learning_rate=1e-3,
          save_path='model/style.ckpt'):
    # 根据batch丢弃最后的训练图像
    mod = len(content_targets) % batch_size
    if mod > 0:
        content_targets = content_targets[:-mod]

    style_features = {}
    # 训练图像大小:320x320x3,按照tensorflow格式
    batch_shape = (batch_size, 320, 320, 3)
    style_shape = (1, ) + style_target.shape

    # 读取训练好的VGGNet模型
    weights, mean_pixel = vgg.load_net(vgg_path)

    with tf.Graph().as_default(), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        # 没看错!空图片减去均值
        style_image_pre = vgg.preprocess(style_image, mean_pixel)
        net = vgg.net(weights, style_image_pre)
        # 把style图片展开形成数组
        style_pre = np.array([style_target])
        for layer in STYLE_LAYER:
            # 取出该层的计算结果
            features = net[layer].eval(feed_dict={style_image: style_pre})
            # 行数为该层的Filter数(参见论文)
            features = np.reshape(features, (-1, features.shape[3]))
            # Gram Matrix: A'A (参见论文)
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        x_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name='x_content')
        x_pre = vgg.preprocess(x_content, mean_pixel)

        content_features = {}
        content_net = vgg.net(weights, x_pre)
        # 同上,提取所需层
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        # 使用残差网络
        preds = residual.net(x_content / 255.0)
        preds_pre = vgg.preprocess(preds, mean_pixel)
        net = vgg.net(weights, preds_pre)
        # 计算每个batch里的所有数据
        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            net[CONTENT_LAYER])
        # 计算经过残差网络和不经过时的差别
        content_loss = content_weight * (
            2 * tf.nn.l2_loss(net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size)

        # 计算经过残差网络的图像与style图像之间的差别
        style_losses = []
        for style_layer in STYLE_LAYER:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            # Gram Matrix: A'A (参见论文)
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                style_gram.size)
        style_loss = style_weight * functools.reduce(tf.add,
                                                     style_losses) / batch_size

        # 去图像噪声: Total Variation
        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size

        # 最终的loss函数
        loss = content_loss + style_loss + tv_loss

        # 开始训练过程
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())

        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            start_time = time.time()
            # 每一次epoch就用训练集的所有图片训练一遍
            while iterations * batch_size < num_examples:
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = helper.read_img(img_p, (320, 320, 3)).astype(
                        np.float32)

                iterations += 1
                # 确保每批次计算的时候不出错
                assert X_batch.shape[0] == batch_size
                feed_dict = {x_content: X_batch}

                # 开始训练
                train_step.run(feed_dict=feed_dict)

                # 隔几次打印一次训练进度
                is_print_iter = int(iterations) % print_iterations == 0
                # 是否是最后一个epoch
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples

                # 打印信息
                should_print = is_print_iter or is_last
                if should_print:
                    current_time = time.time()
                    delta_time = current_time - start_time
                    start_time = current_time

                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {x_content: X_batch}

                    tup = sess.run(to_get, feed_dict=test_feed_dict)
                    _style_loss, _content_loss, _tv_loss, _loss, _preds = tup

                    losses = (_style_loss, _content_loss, _tv_loss, _loss)

                    saver = tf.train.Saver()
                    res = saver.save(sess, save_path)
                    yield (_preds, losses, iterations, epoch, delta_time)
Exemple #15
0
#TODO: read images from h5 file
#h5f=h5py.File('./data/image.h5','r')
#image=h5f['image']
image_list = []
root = "E:\\image_cluster\\continue\\days_2_resize\\"
for file in os.listdir(root):
    filename = os.path.join(root, file)
    im = cv2.imread(filename, 0)
    image_list.append(im)

#immatrix = np.array(image_list)

#TODO: load VGG and extract feature

VGG_PATH = 'imagenet-vgg-verydeep-19.mat'
vgg_weights, vgg_mean_pixel = vgg.load_net(VGG_PATH)
CONTENT_LAYERS = ('relu3_1', 'relu3_2', 'relu4_1', 'relu5_1', 'relu5_2')
layer = 'relu5_2'
input_image = cv2.imread(
    "E:\\image_cluster\\continue\\days_2_resize\\2_300001_2016-02-02_2016-02-03_1.png"
)
feature_data = []
shape = (1, ) + input_image.shape
g = tf.Graph()
with g.as_default(), tf.Session() as sess:
    image = tf.placeholder('float', shape=shape)
    net = vgg.net_preloaded(vgg_weights, image, 'avg')
    for name in os.listdir(root):
        filename = os.path.join(root, name)
        input_image = cv2.imread(filename)
        #    content_features = {}
Exemple #16
0
def stylize(network,
            semantic_transfer,
            initial,
            content,
            style,
            mask,
            sem_style_images,
            gradient_capping,
            capped_objs,
            auto_tuning,
            erosion,
            preserve_colors,
            iterations,
            content_weight,
            style_weight,
            tv_weight,
            learning_rate,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """

    t = time.time()

    # Load network
    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    # Dictionaries = features maps for each considered layers
    content_features = {}
    if semantic_transfer:
        style_semantic_features = [{} for _ in sem_style_images]
        guidance_maps = [{} for _ in mask]
        ratio = []  # Auto tuning
        net_gradient = []  # For Gradient Capping
    else:
        style_features = {}

    # Batch
    shape = (1, ) + content.shape

    # To vizualize the loss curves
    if SAVE_ITERATIONS:
        loss_sheet = []

    style_layers_weights = ops.compute_style_layers_weight(
        weight_scheme, STYLE_LAYERS, STYLE_LAYER_WEIGHT_EXP)

    # Content features of content image
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, POOLING)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])

        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # Style features of style images
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, POOLING)

        # Guided Gram Matrices (Semantic style transfer)
        if semantic_transfer:
            # Downsample guidance channels
            ops.down_sample_guidance_channels(mask, auto_tuning, erosion, net,
                                              STYLE_LAYERS, guidance_maps,
                                              ratio)
            for idx, img in enumerate(sem_style_images):
                style_pre = np.array([vgg.preprocess(img, vgg_mean_pixel)])

                for layer in STYLE_LAYERS:
                    features = net[layer].eval(feed_dict={image: style_pre})
                    features = features * guidance_maps[idx][layer]
                    features = np.reshape(features, (-1, features.shape[3]))
                    features = features - 1
                    gram = np.matmul(features.T, features)
                    style_semantic_features[idx][layer] = gram

        # Gram Matrices (Whole style transfer)
        else:
            style_pre = np.array([vgg.preprocess(style, vgg_mean_pixel)])

            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                features = features - 1
                gram = np.matmul(features.T, features) / features.size
                style_features[layer] = gram

    # Initial noise
    initial_content_noise_coeff = 1.0 - INITIAL_NOISEBLEND

    # Optimization
    with tf.Graph().as_default():

        # Initialisation
        if initial is None:
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            initial = initial * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)

        image = tf.Variable(initial)

        # Content loss
        net = vgg.net_preloaded(vgg_weights, image, POOLING)
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = CONTENT_WEIGHT_BLEND
        content_layers_weights['relu5_2'] = 1.0 - CONTENT_WEIGHT_BLEND
        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        style_loss = 0
        style_losses = []

        # Semantic Style Loss
        if semantic_transfer:

            for i in range(len(sem_style_images)):

                segmented_obj = guidance_maps[i]

                if gradient_capping:
                    if capped_objs[i] == 1:
                        mask_tmp = np.expand_dims(mask[i], axis=0)
                        mask_tmp = np.expand_dims(mask_tmp, axis=3)
                        image_tmp = image * tf.stop_gradient(
                            tf.convert_to_tensor(mask_tmp, dtype=tf.float32))
                        net_gradient.append(
                            vgg.net_preloaded(vgg_weights, image_tmp, POOLING))
                    else:
                        net_gradient.append(net)

                for idx, style_layer in enumerate(STYLE_LAYERS):
                    if gradient_capping:
                        layer = net_gradient[i][style_layer]
                    else:
                        layer = net[style_layer]

                    _, height, width, number = map(lambda i: i.value,
                                                   layer.get_shape())
                    size = number
                    feats = layer * segmented_obj[style_layer]

                    # Gram of the stylized image
                    feats = tf.reshape(feats, (-1, number))
                    feats = feats - 1
                    gram = tf.matmul(tf.transpose(feats), feats)

                    # Precomputed Gram of the style image
                    style_gram = style_semantic_features[i][style_layer]
                    style_losses.append(style_layers_weights[style_layer] * 2 *
                                        tf.nn.l2_loss(gram - style_gram) /
                                        (2 * size**2))

                style_loss += style_weight * reduce(tf.add,
                                                    style_losses) * ratio[i]

        # Full Style Loss
        else:
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number  # Ml * Nl
                feats = tf.reshape(layer, (-1, number))
                feats = feats - 1
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * reduce(tf.add, style_losses)

        # Regularization Loss
        tv_loss = ops.regularization_loss(image, tv_weight)

        # Total Loss
        loss = content_loss + style_loss + tv_loss

        # Optimizer
        train_step = tf.train.AdamOptimizer(learning_rate, BETA1, BETA2,
                                            EPSILON).minimize(loss)

        # best is the image returned after optimization
        best_loss = float('inf')
        best = None

        # Optimization
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')

            # Print the progress for every print_iterations
            if (print_iterations and print_iterations != 0):
                utils.print_progress(content_loss.eval(), style_loss.eval(),
                                     tv_loss.eval(), loss.eval())

            # Optimize + print loss + return final image
            for i in range(iterations):
                train_step.run()
                last_step = (i == iterations - 1)

                if print_iterations and i % print_iterations == 0:
                    utils.print_progress(content_loss.eval(),
                                         style_loss.eval(), tv_loss.eval(),
                                         loss.eval())

                if SAVE_ITERATIONS and i % SAVE_ITERATIONS == 0:
                    utils.save_progress(i,
                                        time.time() - t, style_loss.eval(),
                                        content_loss.eval(), loss_sheet)

                if last_step:
                    utils.print_progress(content_loss.eval(),
                                         style_loss.eval(), tv_loss.eval(),
                                         loss.eval())
                    if SAVE_ITERATIONS:
                        utils.save_progress(i,
                                            time.time() - t, style_loss.eval(),
                                            content_loss.eval(), loss_sheet)
                        pyexcel.save_as(records=loss_sheet,
                                        dest_file_name="loss.csv")

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:

                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    # Color preservation
                    if preserve_colors and preserve_colors == True:
                        img_out = colors.preserve_colors(content, img_out)

                    yield ((None if last_step else i), img_out)
Exemple #17
0
G_sample = generator(Z)  # Z = random input images to begin with.
D_real, D_logit_real = discriminator(X)  # X = real dataset
D_fake, D_logit_fake = discriminator(
    G_sample)  # G_sample = generated dataset (fake)
# D_real & D_fake = unused   (D_fake = probability G fools D)
""" Feature Loss """
#VGG
#content = imread('abbeyexample_copy.png')/256
#content = gray2rgb(rgb2gray(content))
# shape = (1,256,256,3)
shape = (mb_size, 256, 256, 3)
pooling = 'avg'
CONTENT_LAYERS = ('relu4_2', 'relu5_2')
network = 'imagenet-vgg-verydeep-19.mat'
vgg_weights, vgg_mean_pixel = vgg.load_net(network)
print(5)
orig_image = tf.placeholder(
    'float', shape=shape)  #need to feed it with (1,256,256,3) objects
print(orig_image)
orig_content = vgg.preprocess(orig_image,
                              vgg_mean_pixel)  #tensor (1,256,256,3)
print(orig_content)
print('G_sample.shape', G_sample.shape)
#G_sample = tf.reshape(G_sample,(mb_size,256,256))
#G_sample = tf.stack([G_sample,G_sample,G_sample],axis=3)   #tensor (256,256,3)
#print('G_sample.shape',G_sample.shape)
gen_content = vgg.preprocess(G_sample, vgg_mean_pixel)
# gen_content = tf.expand_dims(gen_content,0)
print('ok')
orig_net = vgg.net_preloaded(vgg_weights, orig_content, pooling)
Exemple #18
0
def inferenceImg(network, initial_img, initial_noiseblend, content, style,
                 preserve_colors, iterations, content_weight,
                 content_weight_blend, style_weight, style_layer_weight_exp,
                 style_blend_weight, tv_weight, learning_rate, beta1, beta2,
                 epsilon, pooling, print_iterations, checkpoint_iterations):

    content_shape = (1, ) + content.shape
    style_shape = (1, ) + style.shape

    content_features = {}
    style_features = {}

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight = layer_weight * style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum = layer_weights_sum + style_layers_weights[
            style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = style_layers_weights[
            style_layer] / layer_weights_sum

    # compute content features in feedforward mode
    g1 = tf.Graph()
    with g1.as_default(), g1.device('/cpu:0'), tf.Session() as sess:
        contentImg = tf.placeholder('float', shape=content_shape)
        net = vgg.net_preloaded(vgg_weights, contentImg, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={contentImg: content_pre})

    # compute style features in feedforward mode
    g2 = tf.Graph()
    with g2.as_default(), g2.device('/cpu:0'), tf.Session() as sess:
        styleImg = tf.placeholder('float', shape=style_shape)
        net = vgg.net_preloaded(vgg_weights, styleImg, pooling)
        style_pre = np.array([vgg.preprocess(style, vgg_mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={styleImg: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        noise = np.random.normal(size=content_shape,
                                 scale=np.std(content) * 0.1)
        initial = tf.random_normal(content_shape) * 0.256
        inferenceImg = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, inferenceImg, pooling)

        # compute content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # compute style loss
        style_loss = 0
        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            _, height, width, number = map(lambda i: i.value,
                                           layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(style_layers_weights[style_layer] * 2 *
                                tf.nn.l2_loss(gram - style_gram) /
                                style_gram.size)
        style_loss += style_weight * style_blend_weight * reduce(
            tf.add, style_losses)

        # skip compute variation denoise, in order to shorten the running time
        # total variation denoising
        # tv_y_size = _tensor_size(inferenceImg[:, 1:, :, :])
        # tv_x_size = _tensor_size(inferenceImg[:, :, 1:, :])
        # tv_loss = tv_weight * 2 * (
        #         (tf.nn.l2_loss(inferenceImg[:, 1:, :, :] - inferenceImg[:, :content_shape[1] - 1, :, :]) /
        #          tv_y_size) +
        #         (tf.nn.l2_loss(inferenceImg[:, :, 1:, :] - inferenceImg[:, :, :content_shape[2] - 1, :]) /
        #          tv_x_size))

        tv_loss = 0
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer training
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = inferenceImg.eval()

                    img_out = vgg.unprocess(best.reshape(content_shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Exemple #19
0
def stylize(Ray_render,
            ray_steps,
            reset_opp,
            session,
            network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)
                                ]).astype(np.float32)
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    image = initial - tf.cast(tf.reshape(vgg_mean_pixel,
                                         (1, 1, 1, 3)), tf.float32)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    # content loss
    content_layers_weights = 1 / (1.0 * len(CONTENT_LAYERS))

    content_loss = 0
    content_losses = []
    for content_layer in CONTENT_LAYERS:
        content_losses.append(
            content_layers_weights * content_weight *
            (2 * tf.nn.l2_loss(net[content_layer] -
                               content_features[content_layer]) /
             content_features[content_layer].size))
    content_loss += reduce(tf.add, content_losses)

    # style loss
    style_loss = 0
    # overall loss
    loss = content_loss + style_loss  #+ tv_loss

    # optimizer setup
    render_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope='parameters')
    with tf.variable_scope('OPTIMIZATION', reuse=tf.AUTO_REUSE):
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(
                                                loss, var_list=render_vars)
    session.run(tf.initialize_all_variables())
    Ray_render.trace(session, ray_steps, reset_opp, num_steps=50)

    #    evals_ = session.run(tf.squeeze(initial,axis=0)) # <= returns jpeg data you can write to disk

    def print_progress():
        stderr.write('  content loss: %g\n' %
                     content_loss.eval(session=session))
        #        stderr.write('    style loss: %g\n' % style_loss.eval(session=session))
        stderr.write('    total loss: %g\n' % loss.eval(session=session))

    print_progress()

    #    aa= np.squeeze(net[CONTENT_LAYERS[0]].eval(session=session),0)
    #    bb = np.squeeze(content_features[CONTENT_LAYERS[0]],0)
    #    pic_aa=np.squeeze(content)
    #    pic_bb=np.squeeze(initial.eval(session=session),0)
    #
    #    fig = plt.figure(1)
    #    ax2 = fig.add_subplot(1, 1, 1)
    #    ax2.imshow(pic_aa)
    #
    #    fig = plt.figure(2)
    #    ax2 = fig.add_subplot(1, 1, 1)
    #    ax2.imshow(pic_bb)
    #    aa.aa=1

    # optimization
    stderr.write('Optimization started...\n')
    if (print_iterations and print_iterations != 0):
        print_progress()
    for i in range(iterations):
        stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))

        train_step.run(session=session)
        Ray_render.trace(session, ray_steps, reset_opp, num_steps=50)
        last_step = (i == iterations - 1)
        print_progress()

        if (checkpoint_iterations
                and i % checkpoint_iterations == 0) or last_step:
            image_ = image.eval(session=session)
            img_out = vgg.unprocess(image_.reshape(shape[1:]), vgg_mean_pixel)

            if preserve_colors and preserve_colors == True:
                original_image = np.clip(content, 0, 255)
                styled_image = np.clip(img_out, 0, 255)

                # Luminosity transfer steps:
                # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                # 2. Convert stylized grayscale into YUV (YCbCr)
                # 3. Convert original image into YUV (YCbCr)
                # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                # 5. Convert recombined image from YUV back to RGB

                # 1
                styled_grayscale = rgb2gray(styled_image)
                styled_grayscale_rgb = gray2rgb(styled_grayscale)

                # 2
                styled_grayscale_yuv = np.array(
                    Image.fromarray(styled_grayscale_rgb.astype(
                        np.uint8)).convert('YCbCr'))

                # 3
                original_yuv = np.array(
                    Image.fromarray(original_image.astype(
                        np.uint8)).convert('YCbCr'))

                # 4
                w, h, _ = original_image.shape
                combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                combined_yuv[..., 1] = original_yuv[..., 1]
                combined_yuv[..., 2] = original_yuv[..., 2]

                # 5
                img_out = np.array(
                    Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))

            yield ((None if last_step else i), img_out)
Exemple #20
0
def stylize(content,
            style,
            initial,
            initial_noiseblend,
            content_weight=5e0,
            content_layer_num=9,
            style_weight=5e2,
            style_layer_weight=(0.2, 0.2, 0.2, 0.2, 0.2),
            tv_weight=1e2,
            learning_rate=1e1,
            beta1=0.9,
            beta2=0.999,
            epsilon=1e-8,
            preserve_colors=False,
            pooling='max',
            iterations=1000,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape

    content_features = {}
    style_features = {}
    style_layers_weights = {}
    content_layer = CONTENT_LAYERS[content_layer_num]

    for i, style_layer in enumerate(STYLE_LAYERS):
        style_layers_weights[style_layer] = style_layer_weight[i]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)
    image = tf.placeholder(tf.float32, shape=shape)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
    style_pre = np.array([vgg.preprocess(style, vgg_mean_pixel)])

    # compute content features,style features in feedforward mode
    with tf.Session() as sess:
        content_features[content_layer] = sess.run(
            net[content_layer], feed_dict={image: content_pre})

        for layer in STYLE_LAYERS:
            features = sess.run(net[layer], feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    # make stylized image using backpropogation
    if initial is None:
        noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
        initial = tf.random_normal(shape) * 0.256
    else:
        initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
        initial = initial.astype(np.float32)
        noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
        initial = initial * (1 - initial_noiseblend) + (
            tf.random_normal(shape) * 0.256) * initial_noiseblend
    image = tf.Variable(initial)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    # content loss
    content_loss = content_weight * 2 * tf.nn.l2_loss(
        net[content_layer] -
        content_features[content_layer]) / content_features[content_layer].size

    # style loss
    style_loss = 0
    for style_layer in STYLE_LAYERS:
        layer = net[style_layer]
        _, height, width, number = map(lambda i: i.value, layer.get_shape())
        size = height * width * number
        feats = tf.reshape(layer, (-1, number))
        gram = tf.matmul(tf.transpose(feats), feats) / size
        style_gram = style_features[style_layer]
        style_loss += style_weight * style_layers_weights[
            style_layer] * 2 * tf.nn.l2_loss(gram -
                                             style_gram) / style_gram.size

    # total variation denoising
    tv_y_size = _tensor_size(image[:, 1:, :, :])
    tv_x_size = _tensor_size(image[:, :, 1:, :])
    tv_loss = tv_weight * 2 * (
        (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :]) /
         tv_y_size) +
        (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :]) /
         tv_x_size))

    # overall loss
    loss = content_loss + style_loss + tv_loss

    # optimizer setup
    train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                        epsilon).minimize(loss)

    def print_progress():
        print('  content loss: %g\n' % content_loss.eval())
        print('    style loss: %g\n' % style_loss.eval())
        print('       tv loss: %g\n' % tv_loss.eval())
        print('    total loss: %g\n' % loss.eval())

    # optimization
    best_loss = float('inf')
    best = None
    images = []
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print('Optimization started...\n')
        if (print_iterations and print_iterations != 0):
            print_progress()
        for i in range(iterations):
            train_step.run()

            last_step = (i == iterations - 1)
            if last_step or (print_iterations and i % print_iterations == 0):
                print('Iteration %4d/%4d\n' % (i + 1, iterations))
                print_progress()

            if (checkpoint_iterations
                    and i % checkpoint_iterations == 0) or last_step:
                this_loss = loss.eval()

                styled_image = np.clip(
                    vgg.unprocess(image.eval().reshape(shape[1:]),
                                  vgg_mean_pixel), 0, 255)

                if this_loss < best_loss:
                    best_loss = this_loss
                    best = styled_image

                if preserve_colors and preserve_colors == True:
                    original_image = np.clip(content, 0, 255)

                    # Luminosity transfer steps:
                    # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                    # 2. Convert stylized grayscale into YUV (YCbCr)
                    # 3. Convert original image into YUV (YCbCr)
                    # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                    # 5. Convert recombined image from YUV back to RGB

                    # 1
                    styled_grayscale = rgb2gray(styled_image)
                    styled_grayscale_rgb = gray2rgb(styled_grayscale)

                    # 2
                    styled_grayscale_yuv = np.array(
                        Image.fromarray(styled_grayscale_rgb.astype(
                            np.uint8)).convert('YCbCr'))

                    # 3
                    original_yuv = np.array(
                        Image.fromarray(original_image.astype(
                            np.uint8)).convert('YCbCr'))

                    # 4
                    w, h, _ = original_image.shape
                    combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                    combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                    combined_yuv[..., 1] = original_yuv[..., 1]
                    combined_yuv[..., 2] = original_yuv[..., 2]

                    # 5
                    styled_image = np.array(
                        Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))

                plt.figure(figsize=(8, 8))
                plt.imshow(styled_image.astype(np.uint8))
                plt.axis('off')
                plt.show()

                images.append(styled_image.astype(np.uint8))

    return images, best
Exemple #21
0
def stylize(network, initial, initial_noiseblend, content, styles, matte,
        preserve_colors, iterations, content_weight, content_weight_blend,
        style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        matte_weight, learning_rate, beta1, beta2, epsilon, pooling,
        output, dest_txt, dest_fig,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded       
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape                               #rajoute un 1 en tant que 1ere dimension de content
    style_shapes = [(1,) + style.shape for style in styles]    #idem sur les images de style 
    content_features = {}                                      #Création dico 
    style_features = [{} for _ in styles]                      #idem pour chaque image de style 

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)         

    print('\n',vgg_mean_pixel.shape,'\n')

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight  # => relu1_1 : 1  ;  relu2_1 : 1*style_layer_weight_exp  ; ... ;  relu5_1 : (style_layer_weight_exp)**4
        layer_weight *= style_layer_weight_exp            # (default : style_layer_weight_exp=1) => seulement des 1

    # normalize style layer weights => sum=1
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum    # => on obtient 1 liste normalisée à 5 élts pour chaque image de style

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:   #Toutes les opérations construites dans ce contexte (indentées) seront placées sur le CPU:0 et dans le graphe g
                                                                     #"with Session" ferme la session lorsque c'est terminé       
        image = tf.placeholder('float', shape = shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)         #dictionnaire associant à chaque élt de VGG19-LAYERS un tensor , shape.len=4
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})
        

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])   #retourne une matrice image_style[i] - vgg_mean_pixel
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
#                print("\n")
#                print(features)
#                print("shape",features.shape, features.size)
#                print("\n")
                features = np.reshape(features, (-1, features.shape[3]))
#                print("\n")
#                print(features)
#                print("shape",features.shape, features.size)
#                print("\n")
                gram = np.matmul(features.T, features) / features.size   #matmul = matrix multiplication  => gram=[features(transposée) x features] / features.size
                style_features[i][layer] = gram                          #style_features = liste de dictionnaires

    initial_content_noise_coeff = 1.0 - initial_noiseblend     #noiseblend = input (optionnel)

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        #initial = tf.random_normal(shape) * 0                              #image de départ = blanche

        if initial is None:                                                     #initial = image de laquelle on part pour construire l'image suivante
            #noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256                           #initial non renseignée => aléatoire                    
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])       # initial - mean_pixel
            initial = initial.astype('float32')
            #noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
        initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)  
        #(default : initial_noiseblend=0) => initial = inchangé
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)


        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend      #default : content_weight_blend = 1      ==>...['relu4_2]=1      
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend                                        #==>...['relu5_2]=0

        content_loss = 0          #initialisation inutile mais on garde le même format pour style loss
        content_losses = []
        for content_layer in CONTENT_LAYERS:              #CONTENT_LAYERS = ('relu4_2', 'relu5_2')
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(             #content_weight = alpha/2
                    net[content_layer] - content_features[content_layer]) / content_features[content_layer].size))         #content_losses = liste de 2 élts
                    #net[content_layer] = features de l'image générée ; content_features[content_layer] = features de l'image d'origine 
        content_loss += reduce(tf.add, content_losses)       # = somme des élts de content_losses (on calcule l'erreur sur chaque layer, puis on additionne ces erreurs)
#(default : content_layers_weights['relu5_2]=0 => content_loss = content_losses[0])

        # style loss
        style_loss = 0
        for i in range(len(styles)):       #nb d'images de style
            style_losses = []
            for style_layer in STYLE_LAYERS:             #STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
                layer = net[style_layer]
                _, height, width, number = map(lambda j: j.value, layer.get_shape())   # "_" => discard the first elt of the tuple
                        #lambda = definit la fonction qui a j associe j.value ; map applique la fonction a tous les élts de layer.get_shape)
                size = height * width * number
#                print("number ",number)
#                print("layer.shape",layer.get_shape())
                feats = tf.reshape(layer, (-1, number))      #supprime dim0 (=1), dim0=dim1*dim2, dim1=dim3=number      => shape = (dim1*dim2 , number)
#                print("feats.shape",feats.get_shape())
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]  #style_features = liste de dictionnaires initialisée dans "compute style featurs in feedforwardmode"
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) #liste contenant les erreurs de tous les layers de l'image i
                #gram = style representation of generated image ; style_gram = style representation of original image 
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)   
            #incrémentation de style_loss : reduce=sum(err layers de im[i]) ; style_weight = poids du style par rapp au content 
            # style_blend_weights[i] = pids de l'im. i par rapp aux autres
            # += => on somme les losses de toutes les images

        # matting lapacian loss
        loader = np.load(matte)
        lcoo = csr_matrix((loader['data'], loader['indices'], loader['indptr']),
                        shape=loader['shape']).tocoo()
        lindices = np.mat([lcoo.row, lcoo.col]).transpose()
        lvalues = tf.constant(lcoo.data,  dtype=tf.float32)
        laplacian = tf.SparseTensor(indices=lindices, values=lvalues, dense_shape=lcoo.shape)

        matte_loss = 0
        matte_losses = []
        for i in range(3):
            imr = tf.reshape(image[:,:,:,i], [-1, 1])
            matte_losses.append(
                tf.matmul(tf.transpose(imr),
                          tf.sparse_tensor_dense_matmul(laplacian, imr))[0][0]
            )
        matte_loss += matte_weight * reduce(tf.add, matte_losses)


        # total variation denoising                       (pas très important : a remplacer par une autre loss ?)
        print("\n total variation denoising")            #(possible de désactiver la tv loss avec la commande --tv-weight 0)   

        tv_y_size = _tensor_size(image[:,1:,:,:])
        print(tv_y_size)
        tv_x_size = _tensor_size(image[:,:,1:,:])
        print(tv_x_size)
        print("\n")
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))


        # GAN loss
        


                
        # overall loss
        loss = content_loss + style_loss + matte_loss + tv_loss    # make alpha etc appear (coeffs)

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)       # (operation qui met a jour les variables pour que total loss soit minimise) 
         # quelles variables ???
        
        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
           # stderr.write('    matte loss: %g\n' % matte_loss.eval())
            stderr.write('      GAN loss: %g\n' % GAN_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())
        
        
        
        # optimization
        best_loss = float('inf')                          #???
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())    #initialise les variables globales
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):     #Si on a rentré un pas pour print_iterations, on affiche avant la 1ere iteration les loss e initial
                print_progress()
                
            c_loss = []                #initialisation des listes de valeurs de loss
            s_loss = []
            t_loss = []
            tot_loss = []    
            x=[i+1 for i in range(iterations)]   #initialisation des abscisses des graphes
            
            for i in range(iterations):
                
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()                                               #on minimise loss a chaque itération

                c_loss.append(content_loss.eval())         #incrémentation des listes de valeurs de loss pour chaque itération
                s_loss.append(style_loss.eval())
                t_loss.append(tv_loss.eval())
                tot_loss.append(loss.eval())

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0)   : #i % print_iterations = reste de la diveucl de i par print_iterations      
                    print_progress()                                                 #On affiche les loss instantannées avec une fréquence = print_iterations
                    if last_step :
                        if dest_txt is None:
                            l=len(output)-4                #Création d'un fichier contenant les losses (même nom que l'output mais .txt)
                            file=output[:l]
                            F=open(''.join([file,'.txt']),'x')      #fusionne file et '.txt'
                            F.writelines(['  content loss: %g\n' % content_loss.eval() , '    style loss: %g\n' % style_loss.eval() , 
                                          '       tv loss: %g\n' % tv_loss.eval() , '    total loss: %g\n' % loss.eval()])
                            F.close
                        else:
                            F=open(dest_txt,'x')   
                            F.writelines(['  content loss: %g\n' % content_loss.eval() , '    style loss: %g\n' % style_loss.eval() , 
                                          '       tv loss: %g\n' % tv_loss.eval() , '    total loss: %g\n' % loss.eval()])
                            F.close
                        
                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:          
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()            #on associe l'image finale à la meilleure loss totale

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)
                    
                    if preserve_colors and preserve_colors == True:                           #preserve-colors
                        original_image = np.clip(content, 0, 255)        #clip = tous les élts de content >255 -->255, idem <0 -->0
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )
            
            
            #Nom de la destination des courbes
            if dest_fig is None :               
                l=len(output)-4                
                file=output[:l]
                dest_fig=''.join([file,'_fig','.jpg'])
                
            print('dest_fig',dest_fig)


            #Tracé des graphes
            plt.figure(1)
            plt.title("Différents types d'erreurs - graphe classique et graphe semi-logarithmique")
            plt.subplot(2,1,1)
            plt.plot(x, c_loss, label='content_loss')
            plt.plot(x, s_loss, label='style_loss')
            plt.plot(x, t_loss, label='tv_loss')
            plt.plot(x, tot_loss, label='total_loss')
            plt.grid('on')
            plt.axis('tight')
            plt.legend()
            plt.ylabel('erreur')
            
            plt.subplot(2,1,2)
            plt.semilogy(x, c_loss, label='content_loss')
            plt.semilogy(x, s_loss, label='style_loss')
            plt.semilogy(x, t_loss, label='tv_loss')
            plt.semilogy(x, tot_loss, label='total_loss')
            plt.grid('on')
            plt.axis('tight')                         
            plt.xlabel("i (Nombre d'itérations)")
            plt.ylabel('erreur')
            plt.savefig(dest_fig)
Exemple #22
0
def main():
    # This will print all array values in full
    np.set_printoptions(threshold=np.nan)

    parser = build_parser()
    options = parser.parse_args()

    if not os.path.isfile(options.network):
        parser.error(
            "Network %s does not exist. (Did you forget to download it?)" %
            options.network)

    # Load the vgg weights in advance
    vgg_weights, vgg_mean_pixel = vgg.load_net(options.network)
    content_image = imread(options.content)

    # Jacob: moved this here since the same image features will be used for each style image
    content_features = {}
    g = tf.Graph()
    shape = (1, ) + content_image.shape
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, options.pooling)
        content_pre = np.array([vgg.preprocess(content_image, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    print("READY")
    sys.stdout.flush(
    )  # Make sure Java can sense this output before Python blocks waiting for input
    count = 0
    #for style in style_images: # loop through separate style inputs individually
    for line in sys.stdin:
        # Assumes a single line of input will be a json for one image
        style = jsonimread(line)

        width = options.width
        if width is not None:
            new_shape = (int(
                math.floor(
                    float(content_image.shape[0]) / content_image.shape[1] *
                    width)), width)
            content_image = scipy.misc.imresize(content_image, new_shape)
        target_shape = content_image.shape
        # This batch of code was in a loop for each style input before
        style_scale = STYLE_SCALE
        if options.style_scales is not None:
            style_scale = options.style_scales[i]
        style = scipy.misc.imresize(
            style, style_scale * target_shape[1] / style.shape[1])

        # Removed code for blanding between multiple styles
        style_blend_weights = [1.0]

        initial = options.initial
        if initial is not None:
            initial = scipy.misc.imresize(imread(initial),
                                          content_image.shape[:2])
            # Initial guess is specified, but not noiseblend - no noise should be blended
            if options.initial_noiseblend is None:
                options.initial_noiseblend = 0.0
        else:
            # Neither inital, nor noiseblend is provided, falling back to random generated initial guess
            if options.initial_noiseblend is None:
                options.initial_noiseblend = 1.0
            if options.initial_noiseblend < 1.0:
                initial = content_image

        if options.checkpoint_output and "%s" not in options.checkpoint_output:
            parser.error("To save intermediate images, the checkpoint output "
                         "parameter must contain `%s` (e.g. `foo%s.jpg`)")

        for iteration, image in stylize(
                network=options.network,
                initial=initial,
                initial_noiseblend=options.initial_noiseblend,
                content=content_image,
                styles=[style
                        ],  # Changed this to be a list of only one style image
                preserve_colors=options.preserve_colors,
                iterations=options.iterations,
                content_weight=options.content_weight,
                content_weight_blend=options.content_weight_blend,
                style_weight=options.style_weight,
                style_layer_weight_exp=options.style_layer_weight_exp,
                style_blend_weights=style_blend_weights,
                tv_weight=options.tv_weight,
                learning_rate=options.learning_rate,
                beta1=options.beta1,
                beta2=options.beta2,
                epsilon=options.epsilon,
                pooling=options.pooling,
                print_iterations=options.print_iterations,
                checkpoint_iterations=options.checkpoint_iterations,
                # These vgg settings are now loaded only once
                vgg_weights=vgg_weights,
                vgg_mean_pixel=vgg_mean_pixel,
                content_features=content_features):
            output_file = None
            combined_rgb = image
            if iteration is not None:
                if options.checkpoint_output:
                    output_file = options.checkpoint_output % iteration
            else:
                # Change final output files to simply be numbered
                output_file = "%d.JPG" % count
                count = count + 1
            if output_file:
                # No longer save image to file
                #imsave(output_file, combined_rgb)
                # Output json String
                print(json.dumps(combined_rgb.tolist()))
                sys.stdout.flush(
                )  # Make sure Java can sense this output before Python blocks waiting for input
    print("DONE")
Exemple #23
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)#加载vgg19与训练模型


    layer_weight = 1.0#layer权重默认为1
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight#style_layer_weight_exp为图像风格的权重默认为1,否则为指数级增长

        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()

    with g.as_default(), g.device('/gpu:0'), tf.Session() as sess:#使用gpu训练,cpu训练大约2个小时,gpu5分钟

        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/gpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)#池化层默认为max规则
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))#将一维数组根据图像大小转为三维
                gram = np.matmul(features.T, features) / features.size#计算gram矩阵
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256#得到一个随机白噪音
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)#将得到的白噪音转为tensorflow对象
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        #网络的高层特征一般是关于输入图像的物体和布局等信息,低层特征一般表达输入图像的像素信息
        #最终选择conv4_2
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)
        #计算content loss

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        #总loos为loss相加
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():#输出相关信息
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(iteration_times[-10:]) * (iterations - i)
                    stderr.write('Iteration %4d/%4d (%s elapsed, %s remaining)\n' % (
                        i + 1,
                        iterations,
                        hms(elapsed),
                        hms(remaining)
                    ))
                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. 将风格图像的RGB转为gray
                        # 2. 将风格图像gray转为ycrcb
                        # 3. 将事物图像转为ycrcb
                        # 4. 将图像重组
                        # 5. 最后转为RGB
                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
Exemple #24
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image);
    `iteration` is None if this is the final image (the last iteration).
    Otherwise tuples are yielded every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """

    # The shape information in the comment is based on the content image 1-content.jpg with shape (533, 400, 3)
    # and 1-style.jpg (316, 400, 3)
    # This should be changed with different images.

    shape = (1,) + content.shape                                # (1, 533, 400, 3)
    style_shapes = [(1,) + style.shape for style in styles]     # (1, 316, 400, 3)
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)         # Load the VGG-19 model.
    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight        # {'relu1_1': 1.0, 'relu2_1': 1.0, 'relu3_1': 1.0, 'relu4_1': 1.0, 'relu5_1': 1.0}
        layer_weight *= style_layer_weight_exp                  # 1.0

    # VGG19 layers:
    # 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
    # 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
    # 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
    # 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
    # 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4'

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:                            # ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
        layer_weights_sum += style_layers_weights[style_layer]  # 5.0
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum  # {'relu1_1': 0.2, 'relu2_1': 0.2, 'relu3_1': 0.2, 'relu4_1': 0.2, 'relu5_1': 0.2}

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)              # {'conv1_1': Tensor..., relu1_1: Tensor...}
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)]) # (1, 533, 400, 3) subtract with the mean pixel
        for layer in CONTENT_LAYERS:                                                  # (relu4_2, relu5_2)
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre}) # Find the feature values for (relu4_2, relu5_2)

    # compute style features in feed forward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])            # (1, 316, 400, 3)
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:                                        # # ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
                features = net[layer].eval(feed_dict={image: style_pre})      # For relu1_1 layer (1, 316, 400, 64)
                features = np.reshape(features, (-1, features.shape[3]))      # (126400, 64)
                gram = np.matmul(features.T, features) / features.size        # (64, 64) Gram matrix - measure the dependency of features.
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend                    # 0

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) # Generate a random image with SD the same as the content image.
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:       # {'relu5_2'}
            # Use MSE as content losses
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]            # For relu1_1: (1, 533, 400, 64)
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))                # (213200, 64)
                gram = tf.matmul(tf.transpose(feats), feats) / size    # Gram matrix for the features in relu1_1 for the result image.
                style_gram = style_features[i][style_layer]            # Gram matrix for the style
                # Style loss is the MSE for the difference of the 2 Gram matrix
                style_losses.append(style_layers_weights[style_layer]
                                    * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # Total variation denoising: Add cost to penalize neighboring pixel is very different.
        # This help to reduce noise.
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )
Exemple #25
0
CONTENT_LAYERS = ('relu4_2', 'relu5_2')
STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')


# shape = (1, 399, 600, 3)
# style_shape = (1, 475, 600, 3)
shape = (1,) + content.shape
style_shape = (1,) + style.shape



#-------------------------------------------------------------
# 500mb 상당의 파일을 로딩한다
# classes, layer, normalization 
# vgg_mean_pixel = ([123.68, 116.779, 103.939])
vgg_weights, vgg_mean_pixel = vgg.load_net('imagenet-vgg-verydeep-19.mat')


layer_weight = 1.0
style_layers_weights = {}

# relu1_1 ~ relu5_1 까지 전부 1.0
for style_layer in STYLE_LAYERS:
    style_layers_weights[style_layer] = layer_weight
    layer_weight *= STYLE_LAYER_WEIGHT_EXP    
 

# normalize style layer weights
# relu1_1 ~ relu5_1 까지 전부 0.2가 된다
layer_weights_sum = 0
for style_layer in STYLE_LAYERS:
Exemple #26
0
def main():
    '''Search for similar images

    Search the style directory for images that closely resemble each image in
    the content directory. Save those images in an output directory folder
    corresponding to each content image, renamed as their matching rank number.
    '''

    parser = build_parser()
    options = parser.parse_args()

    content_files = os.listdir(options.content_dir)
    content_images = [
        read_img(os.path.join(options.content_dir, f)) for f in content_files
    ]

    # n_content by n_style matrix and list to store the best style images
    n_content = len(content_files)
    n_total = n_content * options.n_style
    best_style_score = np.float('inf') * np.ones((n_content, options.n_style))
    best_style_file = np.array([['' for i in range(options.n_style)]
                                for h in range(n_content)],
                               dtype=object)

    vgg_weights, vgg_mean_pixel = vgg.load_net(options.network)

    content_features = [{} for _ in content_images]
    for i, c in enumerate(content_images):
        with tf.Graph().as_default(), tf.Session() as sess:
            image = tf.placeholder('float', shape=(1, ) + c.shape)
            net = vgg.net_preloaded(vgg_weights, image, 'max')
            content_pre = np.array([vgg.preprocess(c, vgg_mean_pixel)])
            for layer in CONTENT_LAYERS:
                content_features[i][layer] = net[layer].eval(
                    feed_dict={image: content_pre})

    final_style_score, final_style_file = search_dir(
        content_features, vgg_weights, vgg_mean_pixel, best_style_score,
        best_style_file, options.style_dir, options.recurse, options.n_search)

    if np.any(np.isinf(final_style_score)):
        inf_total = np.sum(np.isinf(final_style_score))
        print('%d out of %d style images not found.' % (inf_total, n_total),
              'Try rerunning with a smaller n-style.')
        raise

    sorted_files = final_style_file[np.indices(
        (n_content, options.n_style))[0],
                                    final_style_score.argsort()]

    format_str = '{0:0>%d}.{1}' % np.ceil(np.log10(n_total))

    os.mkdir(options.output_dir)
    for i, f in enumerate(content_files):
        fname = ''.join(f.split('.')[:-1])
        print('Copying style files for %s' % fname)
        os.mkdir(os.path.join(options.output_dir, fname))
        for j in range(options.n_style):
            print(sorted_files[i, j])
            img_ext = sorted_files[i, j].split('.')[-1]
            shutil.copy(
                sorted_files[i, j],
                os.path.join(options.output_dir, fname,
                             format_str.format(j, img_ext)))
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image, loss_vals) at every
    iteration. However `image` and `loss_vals` are None by default. Each
    `checkpoint_iterations`, `image` is not None. Each `print_iterations`,
    `loss_vals` is not None.

    `loss_vals` is a dict with loss values for the current iteration, e.g.
    ``{'content': 1.23, 'style': 4.56, 'tv': 7.89, 'total': 13.68}``.

    :rtype: iterator[tuple[int,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))

        # total loss
        loss = content_loss + style_loss + tv_loss

        # We use OrderedDict to make sure we have the same order of loss types
        # (content, tv, style, total) as defined by the initial costruction of
        # the loss_store dict. This is important for print_progress() and
        # saving loss_arrs (column order) in the main script.
        #
        # Subtle Gotcha (tested with Python 3.5): The syntax
        # OrderedDict(key1=val1, key2=val2, ...) does /not/ create the same
        # order since, apparently, it first creates a normal dict with random
        # order (< Python 3.7) and then wraps that in an OrderedDict. We have
        # to pass in a data structure which is already ordered. I'd call this a
        # bug, since both constructor syntax variants result in different
        # objects. In 3.6, the order is preserved in dict() in CPython, in 3.7
        # they finally made it part of the language spec. Thank you!
        loss_store = OrderedDict([('content', content_loss),
                                  ('style', style_loss), ('tv', tv_loss),
                                  ('total', loss)])

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            print('Optimization started...')
            if (print_iterations and print_iterations != 0):
                print_progress(get_loss_vals(loss_store))
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(
                        iteration_times[-10:]) * (iterations - i)
                    print('Iteration %4d/%4d (%s elapsed, %s remaining)' %
                          (i + 1, iterations, hms(elapsed), hms(remaining)))
                else:
                    print('Iteration %4d/%4d' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    loss_vals = get_loss_vals(loss_store)
                    print_progress(loss_vals)
                else:
                    loss_vals = None

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))
                else:
                    img_out = None

                yield i + 1 if last_step else i, img_out, loss_vals

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
Exemple #28
0
def _vggnet(data_path, input_image):
    weights, mean_pixel = vgg.load_net(data_path)
    image = vgg.preprocess(input_image, mean_pixel)
    net = vgg.net_preloaded(weights, image)
    return net
Exemple #29
0
def main(argv):
    parser = build_parser()
    options = parser.parse_args(args=argv)

    # if not os.path.isfile(options.network):
    #     parser.error("Network %s does not exist. (Did you forget to download it?)" % options.network)

    content_image = imread(options.content)
    style_images = [imread(style) for style in options.styles]

    width = options.width
    if width is not None:
        new_shape = (int(
            math.floor(
                float(content_image.shape[0]) / content_image.shape[1] *
                width)), width)
        content_image = scipy.misc.imresize(content_image, new_shape)
    target_shape = content_image.shape
    for i in range(len(style_images)):
        style_scale = STYLE_SCALE
        if options.style_scales is not None:
            style_scale = options.style_scales[i]
        style_images[i] = scipy.misc.imresize(
            style_images[i],
            style_scale * target_shape[1] / style_images[i].shape[1])

    style_blend_weights = options.style_blend_weights
    if style_blend_weights is None:
        # default is equal weights
        style_blend_weights = [1.0 / len(style_images) for _ in style_images]
    else:
        total_blend_weight = sum(style_blend_weights)
        style_blend_weights = [
            weight / total_blend_weight for weight in style_blend_weights
        ]

    initial = options.initial
    if initial is not None:
        initial = scipy.misc.imresize(imread(initial), content_image.shape[:2])
        # Initial guess is specified, but not noiseblend - no noise should be blended
        if options.initial_noiseblend is None:
            options.initial_noiseblend = 0.0
    else:
        # Neither initial, nor noiseblend is provided, falling back to random generated initial guess
        if options.initial_noiseblend is None:
            options.initial_noiseblend = 0
        else:
            initial = content_image

    if options.checkpoint_output and "%s" not in options.checkpoint_output:
        parser.error("To save intermediate images, the checkpoint output "
                     "parameter must contain `%s` (e.g. `foo%s.jpg`)")

    network = options.network
    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    # original_width, original_height = content_image.shape[1], content_image.shape[0]
    # resized_content_image = imresize(content_image, FIRST_PASS_SIZE)
    # resized_style_images = list(map(lambda im: imresize(im, FIRST_PASS_SIZE), style_images))
    # first_pass = apply_style(options,
    #                          initial,
    #                          resized_content_image,
    #                          resized_style_images,
    #                          style_blend_weights,
    #                          options.iterations,
    #                          network,
    #                          vgg_weights,
    #                          vgg_mean_pixel)
    #
    # second_pass = skimage.transform.resize(first_pass, (original_height, original_width), anti_aliasing=True, preserve_range=True)
    final_img = apply_style(options, initial, content_image, style_images,
                            style_blend_weights, options.iterations, network,
                            vgg_weights, vgg_mean_pixel)

    img = imsave(options.output, final_img)
Exemple #30
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(iteration_times[-10:]) * (iterations - i)
                    stderr.write('Iteration %4d/%4d (%s elapsed, %s remaining)\n' % (
                        i + 1,
                        iterations,
                        hms(elapsed),
                        hms(remaining)
                    ))
                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
Exemple #31
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None,
        vgg_weights=None, vgg_mean_pixel=None, # Added so that they are no reloaded every time
        content_features=None): # Added so that they are not recomputed every time
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    style_features = [{} for _ in styles]

    # Added option to have the net pre-loaded before calling the method
    if vgg_weights is None or vgg_mean_pixel is None:
        vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # Jacob: These content features only need to be computed once, and can be reused for
    #        each new style image.
    # compute content features in feedforward mode
    if content_features is None:
        content_features = {}
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=shape)
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
            for layer in CONTENT_LAYERS:
                content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )
Exemple #32
0
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None,
            rContent=False,
            rStyle=False,
            label='label'):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """

    print(rContent, rStyle)
    print('Stylize Begin')

    # creates the shapes for the 'content' image and the array of 'style' images
    # the initial (1,) is for??
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    print('Load VGG Network')
    # load the vgg image classification network....
    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    # apply user defined weights to cutomize style response.
    # through the user settings you can decrease layer weight exponentially with a decay coef.
    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    # so sum(layer_weights} = 1
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    print('Compute Content')
    #if a style set to zero.
    if rStyle == False:
        # compute content features in feedforward mode
        # This is effectively a constant during processing...
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=shape)
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            #preprocess the input image
            content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
            # calculate the 'content' in each conv layer.
            # Q: this seems inefficient since it calcs each layer seperately..
            #  is there a way to do one calc and access all the values???
            for layer in CONTENT_LAYERS:
                content_features[layer] = net[layer].eval(
                    feed_dict={image: content_pre})

    print('Compute Style')
    if rContent == False:

        # compute style features in feedforward mode
        # this again will be a constant in the routine...
        for i in range(len(styles)):
            g = tf.Graph()
            with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
                #set input placeholder shape
                image = tf.placeholder('float', shape=style_shapes[i])
                # load the vgg conv portion of the net
                net = vgg.net_preloaded(vgg_weights, image, pooling)
                # preprocess the input image
                style_pre = np.array(
                    [vgg.preprocess(styles[i], vgg_mean_pixel)])
                for layer in STYLE_LAYERS:
                    # feed in the image to each layer of the net
                    # note: this might be where the thing is slow... are we calculating the first layer 5 times???
                    features = net[layer].eval(feed_dict={image: style_pre})
                    features = np.reshape(
                        features,
                        (-1, features.shape[3]))  # what is this for???
                    # calculate the gram.
                    gram = np.matmul(features.T, features) / features.size
                    style_features[i][layer] = gram

    # We now have the content tensor and the style tensor for our loss functions...
    # We can now 'train' our image...

    initial_content_noise_coeff = 1.0 - initial_noiseblend
    # make stylized image using backpropogation
    with tf.Graph().as_default():
        # we define our starting image (x)
        # try using the content image here and see
        # how the style progresses..
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) *
                                     0.1)  # this line is useless
            initial = tf.random_normal(shape) * 0.256
            # why do we upscale the image by .256???
        else:
            # noise up our input image...
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)

        # Load the net with the inital image as a VARIABLE.
        # This makes it a part of the optimization process!
        image = tf.Variable(initial, name='X')
        tf.summary.histogram("X", image)

        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss -> There are wieght applied here that are not described in the paper...
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend
        content_loss = 0

        with tf.name_scope('Content_Loss'):

            if rStyle == True:
                content_loss = tf.constant(0, dtype='float32')
            else:
                # create calculate content loss operation.
                content_losses = []
                for content_layer in CONTENT_LAYERS:
                    content_losses.append(
                        content_layers_weights[content_layer] *
                        content_weight *
                        (2 * tf.nn.l2_loss(net[content_layer] -
                                           content_features[content_layer]) /
                         content_features[content_layer].size))
                content_loss += reduce(tf.add, content_losses)

        with tf.name_scope('Style_Loss'):
            if rContent == True:
                style_loss = tf.constant(0, dtype='float32')
            else:
                # create style loss operation
                # style loss
                style_loss = 0
                for i in range(len(styles)):
                    style_losses = []
                    for style_layer in STYLE_LAYERS:
                        layer = net[style_layer]
                        _, height, width, number = map(lambda i: i.value,
                                                       layer.get_shape())
                        size = height * width * number
                        feats = tf.reshape(layer, (-1, number))
                        gram = tf.matmul(tf.transpose(feats), feats) / size
                        style_gram = style_features[i][style_layer]
                        style_losses.append(
                            style_layers_weights[style_layer] * 2 *
                            tf.nn.l2_loss(gram - style_gram) / style_gram.size)
                    style_loss += style_weight * style_blend_weights[
                        i] * reduce(tf.add, style_losses)

        with tf.name_scope('TVD_Loss'):

            # total variation denoising ?????
            # What is this denoising processs??? -> regularization of sorts?
            tv_y_size = _tensor_size(image[:, 1:, :, :])
            tv_x_size = _tensor_size(image[:, :, 1:, :])
            tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:, 1:, :, :] -
                               image[:, :shape[1] - 1, :, :]) / tv_y_size) +
                (tf.nn.l2_loss(image[:, :, 1:, :] -
                               image[:, :, :shape[2] - 1, :]) / tv_x_size))
            # create overall loss operation...
        with tf.name_scope('Loss'):
            loss = content_loss + style_loss + tv_loss

        tf.summary.scalar('Loss', loss)
        tf.summary.scalar('TV_Loss', tv_loss)
        tf.summary.scalar('Style_Loss', style_loss)
        tf.summary.scalar('Content_Loss', content_loss)
        summ = tf.summary.merge_all()
        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            writer = tf.summary.FileWriter(os.path.join(LOGDIR, label))
            writer.add_graph(sess.graph)

            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                # run a train step...

                #if i % 1 == 0:
                #[s] = sess.run([summ])
                #writer.add_summary(s)
                _, s = sess.run([train_step, summ])
                #[s] = train_step.run([summ])
                writer.add_summary(s)

                #output anything needed for user....

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    # calculate loss
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        # clac best image...
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    #lName = '%s%d' % (label ,i)
                    #im_sum = tf.summary.image(lName, img_out, 1)
                    #writer.add_summary(im_sum)
                    yield ((None if last_step else i), img_out)