Beispiel #1
0
def optimize(content_targets,
             style_target,
             content_weight,
             style_weight,
             tv_weight,
             vgg_path,
             epochs=2,
             print_iterations=1000,
             batch_size=4,
             save_path='saver/fns.ckpt',
             slow=False,
             learning_rate=1e-3,
             debug=False):
    if slow:
        batch_size = 1

    mod = len(content_targets) % batch_size
    if mod > 0:
        content_targets = content_targets[:-mod]
        print("Train set has been trimmed down to %d" % (len(content_targets)))

    style_features = {}

    batch_shape = (batch_size, 256, 256, 3)
    style_shape = (1, ) + style_target.shape
    print("style_shape is", style_shape)

    # precompute style features
    print("Precomputing style features")
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    print("Computing content features")
    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256)
            preds_pre = preds
        else:
            preds = transform.net(X_content / 255.0)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            net[CONTENT_LAYER])
        content_loss = content_weight * (
            2 * tf.nn.l2_loss(net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size)

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add,
                                                     style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        delta_time = 60
        num_examples = len(content_targets)
        iterations_per_epoch = num_examples / batch_size
        total_iterations = iterations_per_epoch * epochs
        iterations_completed = 0

        for epoch in range(epochs):
            print("Starting epoch %d of %d" % (epoch + 1, epochs))

            iterations = 0
            while iterations * batch_size < num_examples:
                time_remaining = delta_time * (total_iterations -
                                               iterations_completed)
                print(
                    "Epoch %d/%d iteration %d/%d (completed: %d/%d @ %s).  %0.2f hours left"
                    % (epoch + 1, epochs, iterations + 1, iterations_per_epoch,
                       iterations_completed, total_iterations, delta_time,
                       time_remaining / (60 * 60) * 1.0))

                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = get_img(img_p,
                                         (256, 256, 3)).astype(np.float32)

                iterations += 1
                iterations_completed += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {X_content: X_batch}

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {X_content: X_batch}

                    tup = sess.run(to_get, feed_dict=test_feed_dict)
                    _style_loss, _content_loss, _tv_loss, _loss, _preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                        _preds = vgg.unprocess(_preds)
                    else:
                        saver = tf.train.Saver()
                        res = saver.save(sess, save_path)
                    yield (_preds, losses, iterations, epoch)
Beispiel #2
0
def stylize(network_path='imagenet-vgg-very0.001p-19.mat', content, styles, iterations=1000,
        content_weight=5e0, content_weight_blend=1, style_weight=5e2, style_layer_weight_exp=1, style_blend_weights=None, tv_weight=1e2,
        learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, pooling='avg',
        print_iterations=100, checkpoint_iterations=100, checkpoint_path=None, output_path=None):

    """
    This is a function to stylelize images,
    given the content image, list of style images, path to the network and all the hypter parameters.

    Returns
    -------
    stylized_img : np.ndarray
        N x H x W x C image.
    """
    # calculate the shape of the network input tensor according to the content image
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network_path)

    # scale the importance of each sytle layers according to their depth. (deeper layers are more important if style_layers_weights > 1 (default = 1))
    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features of the content image by feeding it into the network
    @TODO why put graph on cpu?, what is the high level idea of content_features?
    g = tf.Graph()
    with g.as_default(), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features of the content image by feeding it into the network
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram


    # make stylized image using backpropogation
    # if the users doesn't specify a input image, start with noise
    # @TODO where does the number 0.256 come from?
    with tf.Graph().as_default():

        initial = tf.random_normal(shape) * 0.256

        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss, we can adjust the weight of each CONTENT_LAYERS
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # We can specify different weight for different style images
        if style_blend_weights is None:
            # default is equal weights
            style_blend_weights = [1.0/len(style_images) for _ in style_images]
        else:
            total_blend_weight = sum(style_blend_weights)
            # normalization
            style_blend_weights = [weight/total_blend_weight
                                   for weight in style_blend_weights]
        # style loss
        style_loss = 0
        # iterate to calculate style lose with multiple style images
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)


        # total variation denoising, according to the paper
        # Mahendran, Aravindh, and Andrea Vedaldi. "Understanding deep image representations by inverting them."
        # Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015.
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    # yield (
                    #     (None if last_step else i),
                    #     img_out
                    # )
                    output_file = None
                    if not last_step:
                        if checkpoint_path:
                            output_file = checkpoint_path % iteration
                    else:
                        output_file = output_path

                    if output_file:
                        imsave(output_file, image)
Beispiel #3
0
def stylize(network,
            content,
            styles,
            shape,
            iterations,
            content_weight=5.0,
            style_weight=100.0,
            tv_weight=100.0,
            style_blend_weights=None,
            learning_rate=10.0,
            initial=None,
            use_mrf=False,
            use_semantic_masks=False,
            mask_resize_as_feature=True,
            output_semantic_mask=None,
            style_semantic_masks=None,
            semantic_masks_weight=1.0,
            print_iterations=None,
            checkpoint_iterations=None,
            semantic_masks_num_layers=4,
            content_img_style_weight_mask=None):
    # type: (str, Union[None,np.ndarray], List[np.ndarray], Tuple[int,int,int,int], int, float, float, float, Union[None,List[float]], float, Union[None,np.ndarray], bool, bool, bool, Union[None,np.ndarray], Union[None,List[np.ndarray], float, Union[None,int], Union[None,int], Union[None,int], Union[None,np.ndarray], Union[None,int]]) -> Iterable[Tuple[Union[None,int],np.ndarray]]
    """
    Stylize images.
    :param network: Path to pretrained vgg19 network. It can be downloaded at
    http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat
    :param content: The content image. If left blank, it will enter texture generation mode (style synthesis without
    context loss).
    :param styles: A list of style images as numpy arrays.
    :param shape: The shape of the output image. It should be with format (1, height, width, 3)
    :param iterations: The number of iterations to run.
    :param content_weight: The weight for content loss. The larger the weight, the more the output will look like
    the content image.
    :param style_weight: The weight for style loss. The larger the weight, the more the output will have a style that
    looks like the style images.
    :param tv_weight: The weight for total-variation loss. The larger the weight, the smoother the output will be.
    :param style_blend_weights: If inputting multiple style images, this controls the balance between their styles.
    If left as None, it will treat all style images as equal.
    :param learning_rate: As name suggests.
    :param initial: The initial starting point for the output. If left blank, the initial would just be noise.
    :param use_mrf: Whether we use markov-random-field loss instead of gramian loss. mrf_util.py contains more info.
    :param use_semantic_masks: Whether we use semantic masks as additional semantic information. Please check the paper
    "Semantic Style Transfer and Turning Two-Bit Doodles into Fine Artworks" for more information.
    :param mask_resize_as_feature: If true, resize the mask and use the resized mask as additional feature besides the
    vgg network layers. If false, pass the masks (must have exactly 3 masks) into the vgg network and use the outputted
    layers as additional features.
    :param output_semantic_mask: The semantic masks you would like to apply to the outputted image.The mask should have
    shape (batch_size, height, width, semantic_masks_num_layers) Unlike the neural doodle paper, here I use one
    black-and-white image for each semantic mask (the paper had semantic masks represented as rgb images, limiting the
    semantic channels to 3).
    :param style_semantic_masks: A list of semantic masks you would like to apply to each style image. The mask should
    have shape (batch_size, height, width, semantic_masks_num_layers)
    :param semantic_masks_weight: How heavily you'd like to weight the semantic masks as compared to other sources of
    semantic information obtained through passing the image through vgg network. Default is 1.0.
    :param print_iterations: Print loss information every n iterations.
    :param checkpoint_iterations: Save a checkpoint as well as the best image so far every n iterations.
    :param semantic_masks_num_layers: The number of semantic masks each image have.
    :param content_img_style_weight_mask: One black-and-white mask specifying how much we should "stylize" each pixel
    in the outputted image. The areas where the mask has higher value would be stylized more than other areas. A
    completely white mask would mean that we stylize the output image just as before, while a completely dark mask
    would mean that we do not stylize the output image at all, so it should look pretty much the same as content image.
    If you do not wish to use this feature, just leave it as None.
    :return: a tuple where the first item is either the current iteration or None, indicating it has finished training.
    The second item is the image that has the lowest loss so far. The tuples are yielded every 'checkpoint_iterations'
    iterations as well as the last iteration.
    :rtype: iterator[tuple[int|None,image]]
    """
    global STYLE_LAYERS
    if content is not None:
        STYLE_LAYERS = STYLE_LAYERS_WITH_CONTENT
    if use_mrf:
        STYLE_LAYERS = STYLE_LAYERS_MRF  # Easiest way to be compatible with no-mrf versions.
    if use_semantic_masks:
        assert semantic_masks_weight is not None
        assert output_semantic_mask is not None
        assert style_semantic_masks is not None
    if content_img_style_weight_mask is not None:
        if shape[1] != content_img_style_weight_mask.shape[1] or shape[
                2] != content_img_style_weight_mask.shape[2]:
            raise AssertionError(
                "The shape of style_weight_mask is incorrect. It must have the same height and width "
                "as the output image. The output image has shape: %s and the style weight mask has "
                "shape: %s" %
                (str(shape), str(content_img_style_weight_mask.shape)))
        if content_img_style_weight_mask.dtype != np.float32:
            raise AssertionError(
                'The dtype of style_weight_mask must be float32. it is now %s'
                % str(content_img_style_weight_mask.dtype))
    if len(styles) == 0:
        raise AssertionError("Must feed in at least one style image.")

    # Append a (1,) in front of the shapes of the style images. So the style_shapes contains (1, height, width, 3).
    # 3 corresponds to rgb.
    style_shapes = [(1, ) + style.shape for style in styles]
    if style_blend_weights is None:
        style_blend_weights = [1.0 / len(styles) for _ in styles]
    content_features = {}
    style_features = [{} for _ in styles]
    output_semantic_mask_features = {}

    # The default behavior of tensorflow was to allocate all gpu memory. Here it is set to only use as much gpu memory
    # as it needs.
    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
        vgg_data, mean_pixel = vgg.read_net(network)

        # Compute content features in feed-forward mode
        content_image = tf.placeholder('float',
                                       shape=shape,
                                       name='content_image')
        net = vgg.pre_read_net(vgg_data, content_image)
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER]
        net_layer_sizes = vgg.get_net_layer_sizes(net)

        if content is not None:
            content_pre = np.array([vgg.preprocess(content, mean_pixel)])

        # Compute style features in feed-forward mode.
        if content_img_style_weight_mask is not None:
            style_weight_mask_layer_dict = neural_doodle_util.masks_average_pool(
                content_img_style_weight_mask)

        for i in range(len(styles)):
            # Using precompute_image_features, which calculates on cpu and thus allow larger images.
            style_features[i] = neural_util.precompute_image_features(
                styles[i], STYLE_LAYERS, style_shapes[i], vgg_data, mean_pixel,
                use_mrf, use_semantic_masks)

        if use_semantic_masks:
            output_semantic_mask_features, style_features, content_semantic_mask, style_semantic_masks_images = neural_doodle_util.construct_masks_and_features(
                style_semantic_masks,
                styles,
                style_features,
                shape[0],
                shape[1],
                shape[2],
                semantic_masks_num_layers,
                STYLE_LAYERS,
                net_layer_sizes,
                semantic_masks_weight,
                vgg_data,
                mean_pixel,
                mask_resize_as_feature,
                use_mrf,
                average_pool=False
            )  # TODO: average pool is not working so well in practice??

        if initial is None:
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        _, height, width, number = map(
            lambda i: i.value, content_features[CONTENT_LAYER].get_shape())
        content_features_size = height * width * number
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                                         content_features_size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                if content_img_style_weight_mask is not None:
                    # Apply_style_weight_mask_to_feature_layer, then normalize with average of that style weight mask.
                    layer = neural_doodle_util.vgg_layer_dot_mask(style_weight_mask_layer_dict[style_layer], layer) \
                            / (tf.reduce_mean(style_weight_mask_layer_dict[style_layer]) + 0.000001)

                if use_mrf:
                    if use_semantic_masks:
                        # TODO: Compare the effect of concatenate masks to vgg layers versus dotting them with vgg
                        # layers. If you change this to dot, don't forget to also change that in neural_doodle_util.
                        layer = neural_doodle_util.concatenate_mask_layer_tf(
                            output_semantic_mask_features[style_layer], layer)
                        # layer = neural_doodle_util.vgg_layer_dot_mask(output_semantic_mask_features[style_layer], layer)
                    style_losses.append(
                        mrf_loss(style_features[i][style_layer],
                                 layer,
                                 name='%d%s' % (i, style_layer)))
                else:
                    if use_semantic_masks:
                        gram = neural_doodle_util.gramian_with_mask(
                            layer, output_semantic_mask_features[style_layer])
                    else:
                        gram = neural_util.gramian(layer)
                    style_gram = style_features[i][style_layer]
                    style_gram_size = get_np_array_num_elements(style_gram)
                    style_losses.append(
                        tf.nn.l2_loss(gram - style_gram) / style_gram_size
                    )  # TODO: Check normalization constants. the style loss is way too big compared to the other two.
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)
        # total variation denoising
        tv_loss = tf.mul(neural_util.total_variation(image), tv_weight)

        # overall loss
        if content is None:  # If we are doing style/texture regeration only.
            loss = style_loss + tv_loss
        else:
            loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, feed_dict, last=False):
            stderr.write('Iteration %d/%d\n' % (i + 1, iterations))
            if last or (print_iterations is not None and print_iterations != 0
                        and i % print_iterations == 0):
                if content is not None:
                    stderr.write('  content loss: %g\n' %
                                 content_loss.eval(feed_dict=feed_dict))
                stderr.write('    style loss: %g\n' %
                             style_loss.eval(feed_dict=feed_dict))
                stderr.write('       tv loss: %g\n' %
                             tv_loss.eval(feed_dict=feed_dict))
                stderr.write('    total loss: %g\n' %
                             loss.eval(feed_dict=feed_dict))

        # optimization
        best_loss = float('inf')
        best = np.zeros(shape=shape)
        feed_dict = {}
        if content is not None:
            feed_dict[content_image] = content_pre
        if use_semantic_masks:
            feed_dict[content_semantic_mask] = output_semantic_mask
            for styles_iter in range(len(styles)):
                feed_dict[style_semantic_masks_images[
                    styles_iter]] = style_semantic_masks[styles_iter]
        sess.run(tf.initialize_all_variables(), feed_dict=feed_dict)
        for i in range(iterations):
            last_step = (i == iterations - 1)
            print_progress(i, feed_dict, last=last_step)
            train_step.run(feed_dict=feed_dict)

            if (checkpoint_iterations
                    and i % checkpoint_iterations == 0) or last_step:
                this_loss = loss.eval(feed_dict=feed_dict)
                if this_loss < best_loss:
                    best_loss = this_loss
                    best = image.eval()
                yield ((None if last_step else i),
                       vgg.unprocess(best.reshape(shape[1:]), mean_pixel))
Beispiel #4
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)#加载vgg19与训练模型


    layer_weight = 1.0#layer权重默认为1
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight#style_layer_weight_exp为图像风格的权重默认为1,否则为指数级增长

        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()

    with g.as_default(), g.device('/gpu:0'), tf.Session() as sess:#使用gpu训练,cpu训练大约2个小时,gpu5分钟

        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/gpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)#池化层默认为max规则
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))#将一维数组根据图像大小转为三维
                gram = np.matmul(features.T, features) / features.size#计算gram矩阵
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256#得到一个随机白噪音
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)#将得到的白噪音转为tensorflow对象
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        #网络的高层特征一般是关于输入图像的物体和布局等信息,低层特征一般表达输入图像的像素信息
        #最终选择conv4_2
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)
        #计算content loss

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        #总loos为loss相加
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():#输出相关信息
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(iteration_times[-10:]) * (iterations - i)
                    stderr.write('Iteration %4d/%4d (%s elapsed, %s remaining)\n' % (
                        i + 1,
                        iterations,
                        hms(elapsed),
                        hms(remaining)
                    ))
                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. 将风格图像的RGB转为gray
                        # 2. 将风格图像gray转为ycrcb
                        # 3. 将事物图像转为ycrcb
                        # 4. 将图像重组
                        # 5. 最后转为RGB
                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                print 'Initial feature shape: ', features.shape
                features = np.reshape(features, (-1, features.shape[3]))
                #mask = np.zeros_like(features)
                #mask[:49664/2, :] = 1
                #print 'Mask shape', mask.shape
                print 'Final features shape', features.shape
                #features = features*mask
                gram = np.matmul(features.T, features) / features.size
                print 'Gram matrix shape: ', gram.shape
                style_features[i][layer] = gram

    #sys.exit()
    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                print 'Height, width, number', height, width, number
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                
                #print tf.shape(feats).as_list()

                if normal_flag == 0:
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    maskt = np.reshape(imread('bottle_mask.jpg').astype(np.float32), (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                #print sum(sum(mask == 1)) + sum(sum(mask == 0))
                #mask[:height*width/2, :] = 1
                    if i == 0:
                        mask = tf.constant(mask)
                        feats = tf.mul(feats,mask)

                        gram = tf.matmul(tf.transpose(feats), feats) / size
                        style_gram = style_features[i][style_layer]
                        style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
                    else:
                        mask2 = mask < 1
                        feats2 = tf.mul(feats,mask2)
                        gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                        style_gram = style_features[i][style_layer]
                        style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)

                else:
                    feats2 = feats
                    gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)
                    pass



            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        if normal_flag != 0:
            print "general mask :"
            mask = np.zeros((height*width, number), dtype=np.float32)
            maskt = np.reshape(imread('bottle_mask.jpg').astype(np.float32), (height*width,))
            maskt = maskt > 100
            # for d in xrange(3):
            #     mask[:,d] = maskt
            print 'Mask shape', maskt.shape
            maskt = maskt.reshape((height,width))

            maskt = np.array([maskt,maskt,maskt])
            maskt = maskt.transpose((1,2,0))
            mask = tf.constant(maskt, dtype=tf.float32)
            # feats = tf.mul(feats,mask)

        def capper(a,b,mask):
            # (1, 468, 304, 3)
            print "orig shape", a
            reshaped_in_grad = tf.reshape(a,[-1] )
            print "reshaped grad", reshaped_in_grad

            print "mask" ,mask
            g = tf.mul(a,mask)
            # g = tf.reshape(g, (1,height,width,3))
            # print a,b
            # print g
            return g,b


        # optimizer setup
        # train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        #         # Create an optimizer.
        train_step = tf.train.GradientDescentOptimizer(learning_rate)
         # # Compute the gradients for a list of variables.
        grads_and_vars = train_step.compute_gradients(loss)
        # # grads_and_vars is a list of tuples (gradient, variable).  Do whatever you
        # # need to the 'gradient' part, for example cap them, etc.
        capped_grads_and_vars = [(capper(gv[0], gv[1], mask)) for gv in grads_and_vars]
        # # Ask the optimizer to apply the capped gradients.
        train_step = train_step.apply_gradients(capped_grads_and_vars)

        # opt_op = opt.minimize(cost, var_list=<list of variables>)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)

                

               

                train_step.run()



                # print "runningstep: ",i, running_step
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)

                if i % 10 == 0 and best is not None:
                    tmp_img = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    imsave("iter" + str(i) + ".jpg", tmp_img)

            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
Beispiel #6
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        def print_progress(i, last=False):
            global timenow
            stderr.write('Iteration %d/%d, time: %dms\n' % (i + 1, iterations, current_milli_time() - timenow))
            timenow = current_milli_time()
            if last or (print_iterations and i % print_iterations == 0):
                stderr.write('  content loss: %g\n' % content_loss.eval())
                stderr.write('    style loss: %g\n' % style_loss.eval())
                stderr.write('       tv loss: %g\n' % tv_loss.eval())
                stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                last_step = (i == iterations - 1)
                print_progress(i, last=last_step)
                train_step.run()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                    yield (
                        (None if last_step else i),
                        vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    )
def model_neural_style(pre_train_vgg_path,
                       content_image,
                       style_images,
                       content_weight=5e0,
                       content_weight_blend=1.0,
                       style_weight=5e2,
                       style_layer_weight_exp=1.0,
                       pooling='',
                       initial=None,
                       initial_noiseblend=1.0,
                       tv_weight=1e2,
                       learning_rate=1e1,
                       beta1=0.9,
                       beta2=0.999,
                       epsilon=1e-08,
                       print_iterations=None,
                       iterations=500,
                       checkpoint_iterations=50,
                       preserve_colors=None):
    print "++++++++++++++++++++"
    # input shape of model
    shape = (1, ) + content_image.shape
    style_images_shapes = [(1, ) + style_image.shape
                           for style_image in style_images]
    content_features = {}
    style_features = [{} for _ in style_images]

    # load the weights of pretrained vgg model
    vgg_weights, vgg_mean_pixel = vgg.load_weights(pre_train_vgg_path)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_infer(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content_image, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})
    # # for debug
    # for layer in CONTENT_LAYERS:
    #     item = content_features[layer]
    #     item = item.reshape(item.shape[1], item.shape[2], item.shape[3])
    #     item_for_plot = []
    #     for i in range(item.shape[2]):
    #         item_for_plot.append(item[:, :, i])
    #
    #     tools.show_images(item_for_plot[::8], cols=8)
    # compute style features in feedforward mode

    # compute styles features in feedforward mode
    for i in range(len(style_images)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_images_shapes[i])
            net = vgg.net_infer(vgg_weights, image, pooling)
            style_pre = np.array(
                [vgg.preprocess(style_images[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape,
                                     scale=np.std(content_image) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape,
                                     scale=np.std(content_image) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_infer(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend
        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
            content_loss += reduce(tf.add, content_losses)
        # style loss
        style_loss = 0
        for i in range(len(style_images)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()
                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content_image, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Beispiel #8
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
            content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
            learning_rate, beta1, beta2, epsilon, pooling, exp_sigma, mat_sigma, mat_rho, text_to_print,
            print_iterations=None, checkpoint_iterations=None, kernel=3, d=2, gamma_rho=1, gamma=1, rational_rho=1, alpha=1):

    tf.logging.set_verbosity(tf.logging.INFO)
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]

    0 - dot product kernel
    1 - exponential kernel
    2 - matern kernel
    3 - polynomial kernel

    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))

                # sqr = features.T*features.T
                # dim = features.shape

                if(kernel == 0):
                    gram2 = np.matmul(features.T, features) / features.size
                elif(kernel == 1):
                    gram2 = gramSquaredExp_np(features, exp_sigma) / features.size  # exponential kernal
                elif(kernel == 2):
                    gram2 = gramMatten_np(features, mat_sigma, v, mat_rho) / features.size  # Mattern kernal
                elif(kernel == 3):
                    print(d)
                    gram2 = gramPoly_np(features, C=0, d=d) / features.size
                elif(kernel == 4):
                    gram2 = gramGammaExp_np(features, gamma_rho, gamma) / features.size
                elif(kernel == 4):
                    gram2 = gramRatioanlQuad_np(features, rational_rho, alpha) / features.size

                    # print(features.shape,"diamention of feature\n")
                style_features[i][layer] = gram2

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    g = tf.Graph()
    with g.as_default(), g.device('/gpu'):
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                net[content_layer] - content_features[content_layer]) /
                content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))

                style_gram = style_features[i][style_layer]

                dim = feats.get_shape()
                # print(dim)

                sqr = tf.reduce_sum(tf.transpose(feats) * tf.transpose(feats), axis=1)

                if(kernel == 0):
                    gram = (tf.matmul(tf.transpose(feats), feats)) / size
                elif(kernel == 1):
                    gram = tf.exp(-1 * (tf.transpose(tf.ones([dim[1], dim[1]]) * sqr) + tf.ones([dim[1], dim[1]]) * sqr - 2 *
                                        tf.matmul(tf.transpose(feats), feats)) / 2 / (exp_sigma * exp_sigma)) / size  # exponetial kernal
                elif(kernel == 2):
                    # mattern kernal
                    d2 = tf.nn.relu(tf.transpose(tf.ones([dim[1], dim[1]]) * sqr) + tf.ones([dim[1], dim[1]]) * sqr - 2 * tf.matmul(tf.transpose(feats), feats))
                    if(v == 0.5):
                        gram = mat_sigma**2 * tf.exp(-1 * tf.sqrt(d2) / mat_rho) / size
                    elif(v == 1.5):
                        gram = mat_sigma**2 * (tf.ones([dim[1], dim[1]]) + tf.sqrt(3.0) * tf.sqrt(d2) / mat_rho) * tf.exp(-1 * tf.sqrt(3.0) * tf.sqrt(d2) / mat_rho) / size
                    elif(v == 2.5):
                        gram = mat_sigma**2 * (tf.ones([dim[1], dim[1]]) + tf.sqrt(5.0) * tf.sqrt(d2) / mat_rho + 5 * d2 / 3 / (mat_rho**2)) * tf.exp(-1 * tf.sqrt(5.0) * tf.sqrt(d2) / mat_rho) / size
                elif(kernel == 3):
                    # polynomial kernal
                    gram = (tf.matmul(tf.transpose(feats), feats))**d / size
                elif(kernel == 4):
                    # gamma exponental kernal
                    gram = tf.exp(-1 * (tf.sqrt(d2) / gamma_rho)**gamma) / size
                elif(kernel == 5):
                    # gamma exponental kernal
                    gram = (1 + (d2 / rational_rho**2 / 2 / alpha))**(-1 * alpha) / size

                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)

            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])

        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :]) /
             tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :]) /
             tv_x_size))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        # train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress(last_loss):
            new_loss = loss.eval()
            stderr.write('file ===>  %s \n' % text_to_print)
            stderr.write('  content loss: %1.3e \t' % content_loss.eval())
            stderr.write('    style loss: %1.3e \t' % style_loss.eval())
            stderr.write('       tv loss: %1.3e \t' % tv_loss.eval())
            stderr.write('    total loss: %1.3e \t' % new_loss)
            stderr.write('    loss difference: %1.3e \t\n' % (last_loss - new_loss))
            return new_loss

        def save_progress():
            dict = {"content loss": content_loss.eval(), "style loss": style_loss.eval(), "tv loss": tv_loss.eval(), "total loss": loss.eval()}
            return dict

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            new_loss = 0
            # if (print_iterations and print_iterations != 0):
            #     print_progress()
            for i in range(iterations):
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                    new_loss = print_progress(new_loss)

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    dict = save_progress()
                    this_loss = loss.eval()
                    print(this_loss, "loss in each check point")
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    try:
                        img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)
                    except:
                        print("uanlabe to result image due to given parameters")
                        img_out = "no  image"

                    if preserve_colors and preserve_colors:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))

                    yield (
                        (None if last_step else i),
                        img_out, dict
                    )
Beispiel #9
0
def main():
    content_path, style_path, width, style_scale = sys.argv[1:]
    width = int(width)
    style_scale = float(style_scale)

    content_image = imread(content_path)
    style_image = imread(style_path)

    if width > 0:
        new_shape = (int(math.floor(float(content_image.shape[0]) /
                content_image.shape[1] * width)), width)
        content_image = sm.imresize(content_image, new_shape)
    if style_scale > 0:
        style_image = sm.imresize(style_image, style_scale)

    shape = (1,) + content_image.shape
    style_shape = (1,) + style_image.shape

    content_features = {}
    style_features = {}
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(VGG_PATH, image)
        content_pre = np.array([vgg.preprocess(content_image, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shape)
        net, _ = vgg.net(VGG_PATH, image)
        style_pre = np.array([vgg.preprocess(style_image, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / (features.size)
            style_features[layer] = gram

    with tf.Graph().as_default():
        noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1)
        init = tf.random_normal(shape) * 256 / 1000
        image = tf.Variable(init)
        net, _ = vgg.net(VGG_PATH, image)

        content_loss = tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / (size)
            style_gram = style_features[i]
            style_losses.append(tf.nn.l2_loss(gram - style_gram))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        tv_loss = (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) +
                tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]))
        loss = ALPHA * content_loss + BETA * style_loss + TV_WEIGHT * tv_loss

        train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(100000):
                print 'i = %d' % i
                if i % 10 == 0:
                    print '\tcontent_loss = %15.0f' % content_loss.eval()
                    print '\tstyle_loss   = %15.0f' % style_loss.eval()
                    print '\ttv_loss      = %15.0f' % tv_loss.eval()
                    print '\tloss         = %15.0f' % loss.eval()
                imsave('%05d.jpg' % i, vgg.unprocess(
                        image.eval().reshape(shape[1:]), mean_pixel))
                train_step.run()
Beispiel #10
0
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path, use_IN, epochs=2, print_iterations=1000,
             batch_size=4, save_path='checkpoints/fast_style_transfer.ckpt', slow=False,
             learning_rate=1e-3, debug=False):
    if slow:
        batch_size = 1

    # content_target is a list of files, 4-D size, so this is about the batch size here.
    # If using only one content image, then mod here is 0.
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly...")
        content_targets = content_targets[:-mod]

    # training image get to be 256 x 256 because of get_img resize,
    # it then get into tensorflow graph from Adam optimizer feed_dict.
    batch_shape = (batch_size, 256, 256, 3)
    style_shape = (1,) + style_target.shape # add 1 in the front for batch size, 4-D.
    print(f"batch_shape of the content image is: {batch_shape}")
    print(f"style_shape of the style image is: {style_shape}")

    ### Graph Construction ###
    # vgg won't be trained, because in vgg.py the weights are loaded through that matlab file.
    # computed vgg style features in gram matrices
    # tf.device('/cpu:0')
    config = v1.ConfigProto()
    config.gpu_options.allow_growth = True

    style_features = {}
    with tf.Graph().as_default(), v1.Session(config=config) as sess:
        style_image = v1.placeholder(tf.float32, shape=style_shape, name='style_image') # 4-D placeholder for feed_dict
        vgg_style_net = vgg.net(vgg_path, vgg.preprocess(style_image)) # extract feature volume
        np_style_target = np.array([style_target]) # a 3-D numpy array for feed_dict's input

        for layer in STYLE_LAYERS:
            # vgg_style_net[layer] is a tf.Tensor returned by tf.nn.relu,
            # eval at that layer, by running forward to that vgg layer or entire network.
            features = vgg_style_net[layer].eval(feed_dict={style_image:np_style_target}) # extract a fVol value
            features = np.reshape(features, (-1, features.shape[3])) # (N*H*W, C)
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    # computed vgg content feature map and both losses
    with tf.Graph().as_default(), v1.Session(config=config) as sess:
        X_content = v1.placeholder(tf.float32, shape=batch_shape, name="X_content") # 4-D
        vgg_content_net = vgg.net(vgg_path, vgg.preprocess(X_content)) # run ground truth image through the pre-trained model

        # noisy prediction image runs through feed forward conv net, then
        # run through vgg to extract feature volume predicitons
        if slow:
            preds = tf.Variable(
                tf.random.normal(X_content.get_shape()) * 0.256
            )
            preds_pre = preds
        else:
            preds = transform.net(X_content/255.0, use_IN) # run through the style feed forward network. why need to normalize pixel to 0-1?
        net = vgg.net(vgg_path, vgg.preprocess(preds)) # run generated image through the pre-trained model

        # _tensor_size is a reduce function only count from [1:],
        # so it doesn't have batch_size information.
        content_size = _tensor_size(vgg_content_net[CONTENT_LAYER]) * batch_size
        vgg_content_net_size = _tensor_size(vgg_content_net[CONTENT_LAYER])
        vgg_transform_content_net_size = _tensor_size(net[CONTENT_LAYER])
        # print(f"vgg_content_net_size is {vgg_content_net_size}")
        # print(vgg_content_net[CONTENT_LAYER])
        # print(f"vgg_transform_content_net_size is {vgg_transform_content_net_size}")
        # print(net[CONTENT_LAYER])
        assert vgg_content_net_size == vgg_transform_content_net_size

        # define loss functions
        # content loss
        content_l2_loss = 2 * tf.nn.l2_loss(net[CONTENT_LAYER] - vgg_content_net[CONTENT_LAYER])
        content_loss = content_weight * (content_l2_loss / content_size)

        # style loss
        style_l2_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            N, H, W, C = map(lambda i : i, layer.get_shape())
            feats = tf.reshape(layer, (N, H*W, C))        # N, HW, C
            feats_T = tf.transpose(feats, perm=[0, 2, 1]) # N, C, HW
            pred_gram = tf.matmul(feats_T, feats) / (H * W * C)
            true_gram = style_features[style_layer] # numpy array

            style_l2_loss = 2 * tf.nn.l2_loss(pred_gram - true_gram)
            style_l2_losses.append(style_l2_loss / true_gram.size)
        style_loss = style_weight * functools.reduce(tf.add, style_l2_losses) / batch_size

        # total variation denoising regularization loss
        # test if not needed in NN conv case and mirror padding
        # tv_y_size = _tensor_size(preds[:,1:,:,:])
        # tv_x_size = _tensor_size(preds[:,:,1:,:])
        # # N, H, W, C
        # y_tv = 2 * tf.nn.l2_loss(preds[:, 1:, :, :] - preds[:, :batch_shape[1]-1, :, :]) # H, down - up
        # x_tv = 2 * tf.nn.l2_loss(preds[:, :, 1:, :] - preds[:, :, :batch_shape[2]-1, :]) # W, right - left
        # tv_loss = tv_weight * (x_tv/tv_x_size + y_tv/tv_y_size) / batch_size

        # total loss
        # total_loss = content_loss + style_loss + tv_loss
        total_loss = content_loss + style_loss

        # train the feed forward net, and save weights to a checkpoint.
        import random
        uid = random.randint(1, 100)
        print("This random UID is: %s" % uid)

        optimizer = v1.train.AdamOptimizer(learning_rate).minimize(total_loss)
        sess.run(v1.global_variables_initializer())
        for epoch in range(epochs): # epoch loop
            iterations = 0
            num_examples = len(content_targets) # COCO train2014 ~20000 images
            while iterations * batch_size < num_examples: # batch loop
                # start training a batch
                start_time = time.time()

                X_batch = np.zeros(batch_shape, dtype=np.float32)
                start = iterations * batch_size
                end = iterations * batch_size + batch_size
                for i, img_p in enumerate(content_targets[start:end]): # img_p is a coco images
                   X_batch[i] = get_img(img_p, (256,256,3)).astype(np.float32) # resize to 256 x 256

                optimizer.run(feed_dict={X_content:X_batch})

                end_time = time.time()
                # end training a batch

                # update training information
                iterations += 1
                is_print_iter = int(iterations) % print_iterations == 0
                is_last_train = epoch == epochs - 1 and iterations * batch_size >= num_examples

                if slow:
                    is_print_iter = epoch % print_iterations == 0
                if debug:
                    print("UID: %s, batch training time: %s" % (uid, end_time - start_time))
                # monitor the training losses
                if is_print_iter or is_last_train:
                    _style_loss, _content_loss, _total_loss, _preds = \
                        sess.run([style_loss, content_loss, total_loss, preds],
                                  feed_dict={X_content:X_batch})
                    losses = (_style_loss, _content_loss, _total_loss)
                    generated_image = _preds

                    if slow:
                       generated_image = vgg.unprocess(generated_image)
                    else:
                       res = v1.train.Saver().save(sess, save_path)
                    print("yield")
                    yield(generated_image, losses, iterations, epoch)
Beispiel #11
0
def stylize(content,
            style,
            initial,
            initial_noiseblend,
            content_weight=5e0,
            content_layer_num=9,
            style_weight=5e2,
            style_layer_weight=(0.2, 0.2, 0.2, 0.2, 0.2),
            tv_weight=1e2,
            learning_rate=1e1,
            beta1=0.9,
            beta2=0.999,
            epsilon=1e-8,
            preserve_colors=False,
            pooling='max',
            iterations=1000,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape

    content_features = {}
    style_features = {}
    style_layers_weights = {}
    content_layer = CONTENT_LAYERS[content_layer_num]

    for i, style_layer in enumerate(STYLE_LAYERS):
        style_layers_weights[style_layer] = style_layer_weight[i]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)
    image = tf.placeholder(tf.float32, shape=shape)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
    style_pre = np.array([vgg.preprocess(style, vgg_mean_pixel)])

    # compute content features,style features in feedforward mode
    with tf.Session() as sess:
        content_features[content_layer] = sess.run(
            net[content_layer], feed_dict={image: content_pre})

        for layer in STYLE_LAYERS:
            features = sess.run(net[layer], feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    # make stylized image using backpropogation
    if initial is None:
        noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
        initial = tf.random_normal(shape) * 0.256
    else:
        initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
        initial = initial.astype(np.float32)
        noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
        initial = initial * (1 - initial_noiseblend) + (
            tf.random_normal(shape) * 0.256) * initial_noiseblend
    image = tf.Variable(initial)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    # content loss
    content_loss = content_weight * 2 * tf.nn.l2_loss(
        net[content_layer] -
        content_features[content_layer]) / content_features[content_layer].size

    # style loss
    style_loss = 0
    for style_layer in STYLE_LAYERS:
        layer = net[style_layer]
        _, height, width, number = map(lambda i: i.value, layer.get_shape())
        size = height * width * number
        feats = tf.reshape(layer, (-1, number))
        gram = tf.matmul(tf.transpose(feats), feats) / size
        style_gram = style_features[style_layer]
        style_loss += style_weight * style_layers_weights[
            style_layer] * 2 * tf.nn.l2_loss(gram -
                                             style_gram) / style_gram.size

    # total variation denoising
    tv_y_size = _tensor_size(image[:, 1:, :, :])
    tv_x_size = _tensor_size(image[:, :, 1:, :])
    tv_loss = tv_weight * 2 * (
        (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :]) /
         tv_y_size) +
        (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :]) /
         tv_x_size))

    # overall loss
    loss = content_loss + style_loss + tv_loss

    # optimizer setup
    train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                        epsilon).minimize(loss)

    def print_progress():
        print('  content loss: %g\n' % content_loss.eval())
        print('    style loss: %g\n' % style_loss.eval())
        print('       tv loss: %g\n' % tv_loss.eval())
        print('    total loss: %g\n' % loss.eval())

    # optimization
    best_loss = float('inf')
    best = None
    images = []
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print('Optimization started...\n')
        if (print_iterations and print_iterations != 0):
            print_progress()
        for i in range(iterations):
            train_step.run()

            last_step = (i == iterations - 1)
            if last_step or (print_iterations and i % print_iterations == 0):
                print('Iteration %4d/%4d\n' % (i + 1, iterations))
                print_progress()

            if (checkpoint_iterations
                    and i % checkpoint_iterations == 0) or last_step:
                this_loss = loss.eval()

                styled_image = np.clip(
                    vgg.unprocess(image.eval().reshape(shape[1:]),
                                  vgg_mean_pixel), 0, 255)

                if this_loss < best_loss:
                    best_loss = this_loss
                    best = styled_image

                if preserve_colors and preserve_colors == True:
                    original_image = np.clip(content, 0, 255)

                    # Luminosity transfer steps:
                    # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                    # 2. Convert stylized grayscale into YUV (YCbCr)
                    # 3. Convert original image into YUV (YCbCr)
                    # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                    # 5. Convert recombined image from YUV back to RGB

                    # 1
                    styled_grayscale = rgb2gray(styled_image)
                    styled_grayscale_rgb = gray2rgb(styled_grayscale)

                    # 2
                    styled_grayscale_yuv = np.array(
                        Image.fromarray(styled_grayscale_rgb.astype(
                            np.uint8)).convert('YCbCr'))

                    # 3
                    original_yuv = np.array(
                        Image.fromarray(original_image.astype(
                            np.uint8)).convert('YCbCr'))

                    # 4
                    w, h, _ = original_image.shape
                    combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                    combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                    combined_yuv[..., 1] = original_yuv[..., 1]
                    combined_yuv[..., 2] = original_yuv[..., 2]

                    # 5
                    styled_image = np.array(
                        Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))

                plt.figure(figsize=(8, 8))
                plt.imshow(styled_image.astype(np.uint8))
                plt.axis('off')
                plt.show()

                images.append(styled_image.astype(np.uint8))

    return images, best
Beispiel #12
0
def inferenceImg(network, initial_img, initial_noiseblend, content, style,
                 preserve_colors, iterations, content_weight,
                 content_weight_blend, style_weight, style_layer_weight_exp,
                 style_blend_weight, tv_weight, learning_rate, beta1, beta2,
                 epsilon, pooling, print_iterations, checkpoint_iterations):

    content_shape = (1, ) + content.shape
    style_shape = (1, ) + style.shape

    content_features = {}
    style_features = {}

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight = layer_weight * style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum = layer_weights_sum + style_layers_weights[
            style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = style_layers_weights[
            style_layer] / layer_weights_sum

    # compute content features in feedforward mode
    g1 = tf.Graph()
    with g1.as_default(), g1.device('/cpu:0'), tf.Session() as sess:
        contentImg = tf.placeholder('float', shape=content_shape)
        net = vgg.net_preloaded(vgg_weights, contentImg, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={contentImg: content_pre})

    # compute style features in feedforward mode
    g2 = tf.Graph()
    with g2.as_default(), g2.device('/cpu:0'), tf.Session() as sess:
        styleImg = tf.placeholder('float', shape=style_shape)
        net = vgg.net_preloaded(vgg_weights, styleImg, pooling)
        style_pre = np.array([vgg.preprocess(style, vgg_mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={styleImg: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        noise = np.random.normal(size=content_shape,
                                 scale=np.std(content) * 0.1)
        initial = tf.random_normal(content_shape) * 0.256
        inferenceImg = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, inferenceImg, pooling)

        # compute content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # compute style loss
        style_loss = 0
        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            _, height, width, number = map(lambda i: i.value,
                                           layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(style_layers_weights[style_layer] * 2 *
                                tf.nn.l2_loss(gram - style_gram) /
                                style_gram.size)
        style_loss += style_weight * style_blend_weight * reduce(
            tf.add, style_losses)

        # skip compute variation denoise, in order to shorten the running time
        # total variation denoising
        # tv_y_size = _tensor_size(inferenceImg[:, 1:, :, :])
        # tv_x_size = _tensor_size(inferenceImg[:, :, 1:, :])
        # tv_loss = tv_weight * 2 * (
        #         (tf.nn.l2_loss(inferenceImg[:, 1:, :, :] - inferenceImg[:, :content_shape[1] - 1, :, :]) /
        #          tv_y_size) +
        #         (tf.nn.l2_loss(inferenceImg[:, :, 1:, :] - inferenceImg[:, :, :content_shape[2] - 1, :]) /
        #          tv_x_size))

        tv_loss = 0
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer training
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = inferenceImg.eval()

                    img_out = vgg.unprocess(best.reshape(content_shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Beispiel #13
0
    def stylize(self, network, content, styles, iterations, content_weight,
                content_weight_blend, style_weight, style_layer_weight_exp,
                style_blend_weights, tv_weight, learning_rate, beta1, beta2,
                epsilon, pooling):
        """
        Nałożenie stylu na obraz
        Metoda jest wywoływana iteracyjnie, obliczane są straty i wagi, a potem do rodzica jest przekazywany
        tuple z iteratorem i tablicą obrazu oraz, jeśli to ostatnia iteracja, z obliczonymi stratami

        :rtype: iterator[tuple[int,image]]
        """
        self.style_features = [{} for _ in styles]
        self.content_features = {}
        self.style_shapes = [(1, ) + style.shape for style in styles]
        self.shape = (1, ) + content.shape
        self.vgg_weights, vgg_mean_pixel = vgg.load_net(network)
        self.layer_weight = 1.0
        for style_layer in self.style_layers:
            self.style_layers_weights[style_layer] = self.layer_weight
            self.layer_weight *= style_layer_weight_exp

        self.calculate_sum_weight()
        self.calculate_content_feature(pooling, content, vgg_mean_pixel)
        self.calculate_style_feature(styles, pooling, vgg_mean_pixel)

        # Użycie propagacji wstecznej na stylizowanym obrazie
        with tf.Graph().as_default():
            initial = tf.random_normal(self.shape) * 0.256
            self.image = tf.Variable(initial)
            self.net = vgg.net_preloaded(self.vgg_weights, self.image, pooling)

            self.calculate_content_loss(content_weight_blend, content_weight)
            self.calculate_style_loss(styles, style_weight,
                                      style_blend_weights)
            self.denoise_image(tv_weight)
            self.calculate_total_loss()

            # konfiguracja optymalizatora
            train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                                epsilon).minimize(self.loss)

            # optymalizacja
            best_loss = float('inf')
            best = None
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                for i in range(iterations):
                    if i > 0:
                        print('%4d/%4d' % (i + 1, iterations))
                    else:
                        print('%4d/%4d' % (i + 1, iterations))
                    train_step.run()

                    last_step = (i == iterations - 1)
                    if last_step:
                        loss_vals = self.get_loss_vals(self.loss_store)
                    else:
                        loss_vals = None

                    if last_step:
                        this_loss = self.loss.eval()
                        if this_loss < best_loss:
                            best_loss = this_loss
                            best = self.image.eval()

                        img_out = vgg.unprocess(best.reshape(self.shape[1:]),
                                                vgg_mean_pixel)
                    else:
                        img_out = None

                    yield i + 1 if last_step else i, img_out, loss_vals
Beispiel #14
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                print 'Initial feature shape: ', features.shape
                features = np.reshape(features, (-1, features.shape[3]))
                #mask = np.zeros_like(features)
                #mask[:49664/2, :] = 1
                #print 'Mask shape', mask.shape
                print 'Final features shape', features.shape
                #features = features*mask
                gram = np.matmul(features.T, features) / features.size
                print 'Gram matrix shape: ', gram.shape
                style_features[i][layer] = gram

    #sys.exit()
    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                print 'Height, width, number', height, width, number
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                
                #print tf.shape(feats).as_list()
                print 'Height', height
                print 'Weight', width
                print 'Number', number
                print 'Style features shape', style_features[i][style_layer].shape
                print style_layer
                
                if style_layer == 'relu2_1':
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    temp = imread('emma/emma_test_mask.jpg').astype(np.float32)
                    c = temp.reshape(height,2,width,2)
                    temp = c.max(axis=1).max(axis=2)
                    print temp.shape
                    maskt = np.reshape(temp, (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                    #b = mask.reshape(height*width*2, 2, number/2,2)
                    #mask = b.max(axis=1).max(axis=2)
                    #print 'New mask shape', mask.shape
                else:
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    maskt = np.reshape(imread('emma/emma_test_mask.jpg').astype(np.float32), (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                if i == 0:
                    mask = tf.constant(mask)
                    print 'Mask shape', map(lambda i: i.value, mask.get_shape())
                    feats = tf.mul(feats,mask)

                    gram = tf.matmul(tf.transpose(feats), feats) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
                else:
                    mask2 = mask < 1
                    feats2 = tf.mul(feats,mask2)
                    gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)
                train_step.run()
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)

                if i % 10 == 0 and best is not None:
                    tmp_img = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    imsave("iter" + str(i) + ".jpg", tmp_img)

            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
Beispiel #15
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image);
    `iteration` is None if this is the final image (the last iteration).
    Otherwise tuples are yielded every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """

    # The shape information in the comment is based on the content image 1-content.jpg with shape (533, 400, 3)
    # and 1-style.jpg (316, 400, 3)
    # This should be changed with different images.

    shape = (1,) + content.shape                                # (1, 533, 400, 3)
    style_shapes = [(1,) + style.shape for style in styles]     # (1, 316, 400, 3)
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)         # Load the VGG-19 model.
    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight        # {'relu1_1': 1.0, 'relu2_1': 1.0, 'relu3_1': 1.0, 'relu4_1': 1.0, 'relu5_1': 1.0}
        layer_weight *= style_layer_weight_exp                  # 1.0

    # VGG19 layers:
    # 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
    # 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
    # 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
    # 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
    # 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4'

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:                            # ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
        layer_weights_sum += style_layers_weights[style_layer]  # 5.0
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum  # {'relu1_1': 0.2, 'relu2_1': 0.2, 'relu3_1': 0.2, 'relu4_1': 0.2, 'relu5_1': 0.2}

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)              # {'conv1_1': Tensor..., relu1_1: Tensor...}
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)]) # (1, 533, 400, 3) subtract with the mean pixel
        for layer in CONTENT_LAYERS:                                                  # (relu4_2, relu5_2)
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre}) # Find the feature values for (relu4_2, relu5_2)

    # compute style features in feed forward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])            # (1, 316, 400, 3)
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:                                        # # ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
                features = net[layer].eval(feed_dict={image: style_pre})      # For relu1_1 layer (1, 316, 400, 64)
                features = np.reshape(features, (-1, features.shape[3]))      # (126400, 64)
                gram = np.matmul(features.T, features) / features.size        # (64, 64) Gram matrix - measure the dependency of features.
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend                    # 0

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) # Generate a random image with SD the same as the content image.
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:       # {'relu5_2'}
            # Use MSE as content losses
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]            # For relu1_1: (1, 533, 400, 64)
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))                # (213200, 64)
                gram = tf.matmul(tf.transpose(feats), feats) / size    # Gram matrix for the features in relu1_1 for the result image.
                style_gram = style_features[i][style_layer]            # Gram matrix for the style
                # Style loss is the MSE for the difference of the 2 Gram matrix
                style_losses.append(style_layers_weights[style_layer]
                                    * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # Total variation denoising: Add cost to penalize neighboring pixel is very different.
        # This help to reduce noise.
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )
Beispiel #16
0
def synthesis(network, initial, initial_noiseblend, content, styles,
              iterations, content_weight, content_weight_blend, style_weight,
              style_layer_weight_exp, style_blend_weights, tv_weight,
              learning_rate):
    """
    
    :input
    :-styles: a list containing one or multiple images used as style image.(art work) 
    """
    # calculate the original image (content) shape
    image_shape = (1, ) + content.shape
    # calculate the art image (style) shape
    style_shapes = [(1, ) + style.shape for style in styles]
    # style layer weight exponentional increase - weight(layer<n+1>) = weight_exp*weight(layer<n>)
    style_layers_weights = style_layer_weight_cal(style_layer_weight_exp)

    content_features, style_features, mean_pixel = compute_feature(
        network, image_shape, style_shapes, content, styles)

    initial_content_coeff = 1.0 - initial_noiseblend

    with tf.Graph().as_default():
        # overall loss
        image, content_loss, style_loss, tv_loss, loss = loss_computaion(
            network, initial, image_shape, mean_pixel, initial_content_coeff,
            initial_noiseblend, content, content_weight_blend, content_weight,
            content_features, styles, style_layers_weights, style_features,
            tv_weight, style_weight, style_blend_weights)

        # optimizer setup
        # The original paper didn't specify which optimization method to use, thus here we choose the classical Adam optimizer
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        # optimization
        # optimization_process(train_step, image, content_loss, style_loss, tv_loss, loss, vgg_mean_pixel, preserve_colors, content)
        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')

            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step:
                    print_progress()

                if last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    # final step, generate output image
                    img_out = vgg.unprocess(best.reshape(image_shape[1:]),
                                            mean_pixel)

                    yield ((None if last_step else i), img_out)
def optimize(content_targets,
             style_target,
             content_weight,
             style_weight,
             tv_weight,
             vgg_path,
             epochs=2,
             print_iterations=1000,
             batch_size=4,
             save_path='saver/fns.ckpt',
             slow=False,
             learning_rate=1e-3,
             device='/cpu:0',
             debug=False,
             total_iterations=-1,
             base_model_path=None):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod]

    style_features = {}

    batch_shape = (batch_size, 256, 256, 3)
    style_shape = (1, ) + style_target.shape
    print(style_shape)

    # precompute style features
    print("Precomputing style features")
    sys.stdout.flush()
    with tf.Graph().as_default(), tf.device(device), tf.Session(
            config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name="X_content")
        X_pre = vgg.preprocess(X_content)

        print("Precomputing content features")
        sys.stdout.flush()

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256)
            preds_pre = preds
        else:
            preds = transform.net(X_content / 255.0)
            preds_pre = vgg.preprocess(preds)

        print("Building VGG net")
        sys.stdout.flush()
        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            net[CONTENT_LAYER])
        content_loss = content_weight * (
            2 * tf.nn.l2_loss(net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size)

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            grams = tf.batch_matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                style_gram.size)

        style_loss = style_weight * reduce(tf.add, style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.initialize_all_variables())

        # If base model file is present, load that in to the session
        if base_model_path:
            saver = tf.train.Saver()
            if os.path.isdir(base_model_path):
                ckpt = tf.train.get_checkpoint_state(base_model_path)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                else:
                    raise Exception("No checkpoint found...")
            else:
                saver.restore(sess, base_model_path)

        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        sys.stdout.flush()
        for epoch in range(epochs):
            num_examples = len(content_targets)
            print("number of examples: %s" % num_examples)
            sys.stdout.flush()
            iterations = 0
            while iterations * batch_size < num_examples:
                print("Current iteration : %s" % iterations)
                sys.stdout.flush()

                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = get_img(img_p,
                                         (256, 256, 3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {X_content: X_batch}

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = False
                if epoch == epochs - 1 and iterations * batch_size >= num_examples:
                    is_last = True
                if total_iterations > 0 and iterations >= total_iterations:
                    is_last = True
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {X_content: X_batch}

                    tup = sess.run(to_get, feed_dict=test_feed_dict)
                    _style_loss, _content_loss, _tv_loss, _loss, _preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                        _preds = vgg.unprocess(_preds)
                    else:
                        saver = tf.train.Saver()
                        res = saver.save(sess, save_path)
                    yield (_preds, losses, iterations, epoch)
                if is_last:
                    break
Beispiel #18
0
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path, epochs=2, print_iterations=1000,
             batch_size=4, save_path='saver/fns.ckpt', slow=False,
             learning_rate=1e-3, debug=False, device_and_number=False):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod] 

    style_features = {}

    batch_shape = (batch_size,256,256,3)
    style_shape = (1,) + style_target.shape
    print(style_shape)

    # removed tf.device('/gpu:0'), let system automatically detect available device; this is no longer true
    device_type, device_number = device_and_number.strip('/').split(':')
    if device_type == 'gpu': # /gpu:0 means use GPU 0; /gpu:1 means use GPU 1; /gpu2: means use GPU2; etc.
        os.environ["CUDA_VISIBLE_DEVICES"] = device_number # starts at 0
        session_conf = tf.ConfigProto() # session_conf.gpu_options.allow_growth = True # test if growth slows down training
        # backprop doubles RAM usage
        # for training, takes 2.7 seconds/iter for batch size=20
        # for evaluating loss and saving checkpoint, takes 2.6 seconds (depending on size of test image) using 1000x700 px image 
    else: # /cpu:0 means use all CPUs; /cpu:1 means use 1 CPU; /cpu:2 means use 2 CPUs; etc.
        session_conf = tf.ConfigProto(intra_op_parallelism_threads=int(device_number))
    with tf.Graph().as_default(), tf.Session(config=session_conf) as sess: # precompute style features
        style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image:style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session(config=session_conf) as sess:
        X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256
            )
            preds_pre = preds
        else:
            preds = transform.net(X_content/255.0)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(content_features[CONTENT_LAYER])*batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(net[CONTENT_LAYER])
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_size
        )

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i:i.value,layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0,2,1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:,1:,:,:])
        tv_x_size = _tensor_size(preds[:,:,1:,:])
        y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:batch_shape[1]-1,:,:])
        x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[2]-1,:])
        tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        checkpoint_number = 0
        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {
                   X_content:X_batch
                }
                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {
                       X_content:X_batch
                    }

                    tup = sess.run(to_get, feed_dict = test_feed_dict)
                    _style_loss,_content_loss,_tv_loss,_loss,_preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                        _preds = vgg.unprocess(_preds)
                    else:
                        saver = tf.train.Saver()
                        parent_dir = os.path.dirname(save_path)
                        checkpoint_number += 1
                        actual_dir = os.path.join(parent_dir, "checkpoint_{}".format(checkpoint_number))
                        if not os.path.exists(actual_dir):
                            os.mkdir(actual_dir)
                        filename = os.path.basename(save_path)
                        actual_path = os.path.join(actual_dir, filename) ### hard coded directory name logic
                        res = saver.save(sess, actual_path)
                        if os.path.isfile(actual_path + '.meta'): # delete fns.ckpt.meta file, which takes 160 MBs
                            os.remove(actual_path + '.meta')
                    yield _preds, losses, iterations, epoch, checkpoint_number
Beispiel #19
0
def stylize(network,
            initial,
            content,
            style,
            iterations,
            content_weight,
            style_weight,
            tv_weight,
            learning_rate,
            print_iter=None):
    shape = (1, ) + content.shape
    style_shape = (1, ) + style.shape
    content_features = {}
    style_features = {}

    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
            feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shape)
        net, _ = vgg.net(network, image)
        style_pre = np.array([vgg.preprocess(style, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / (features.size)
            style_features[layer] = gram

    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 256 / 1000
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        content_loss = tf.nn.l2_loss(net[CONTENT_LAYER] -
                                     content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value,
                                           layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / (size)
            style_gram = style_features[i]
            style_losses.append(tf.nn.l2_loss(gram - style_gram))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        tv_loss = (
            tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :]) +
            tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :]))
        loss = content_weight * content_loss + \
            style_weight * style_loss + tv_weight * tv_loss

        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                if print_iter is not None and i % print_iter == 0:
                    print '  content loss: %g' % (content_loss.eval())
                    print '    style loss: %g' % (style_loss.eval())
                    print '       tv loss: %g' % (tv_loss.eval())
                    print '    total loss: %g' % loss.eval()
                print 'Iteration %d/%d' % (i + 1, iterations)
                train_step.run()
            return vgg.unprocess(image.eval().reshape(shape[1:]), mean_pixel)
Beispiel #20
0
def optimize(content_targets,style_target,content_weight,style_weight,
             tv_weight,vgg_path,epochs=2,print_iterations=1000,batch_size = 4,
             save_path='checkpoint/save/model.ckpt',slow=False,learning_rate = 1e-3,debug=False):
    
    if slow:
        batch_size=1
        
    # trimming the total training set size
    extra_train_img = len(content_targets)%batch_size
    
    if extra_train_img>0:
        print("Leaving out {} extra (modulus) train examples ".format(extra_train_img))
        content_targets = content_targets[:-extra_train_img]
    
    train_batch_shape = (batch_size,256,256,3)
    
    style_img_gram_features = {}
    
    # appending the batch size for the style image
    style_img_shape = (1,)+style_target.shape
    
    
    # precomputing the style image gram matrix features
    with tf.Graph().as_default(),tf.device('/cpu:0'),tf.Session() as sess:
        
        # defining style image placeholder and preprocessing the image
        style_image_ph = tf.placeholder(tf.float32,shape=style_img_shape,name='style_image_ph')
        style_image_pre = vgg.preprocess(style_image_ph)
        
        # passing the "preproccessed style image" through the VGG19 network
        style_net = vgg.net(vgg_path,style_image_pre)
        
        # creating the numpy array of the style image
        style_img_feed = np.array([style_target])
        
        for layer in STYLE_LAYERS:
            # activations for the style image's different VGG19 layers
            activations = style_net[layer].eval(feed_dict = {style_image_ph:style_img_feed})
            
            activations = np.reshape(activations,(-1,activations.shape[3]))
            gram = np.matmul(activations.T,activations)/activations.size
            
            style_img_gram_features[layer] = gram
            
    # defining graph for computing the Content cost, Style cost and TV cost
    with tf.Graph().as_default(),tf.Session() as sess:
        
        X_content_ph = tf.placeholder(tf.float32,shape =train_batch_shape,name='X_content_ph' )
        X_pre = vgg.preprocess(X_content_ph)
        
        # precomputing the content image activation for content loss
        content_img_activation = {}
        
        content_net = vgg.net(vgg_path,X_pre)
        content_img_activation[CONTENT_LAYER] = content_net[CONTENT_LAYER]
        
        if slow :
            preds = tf.Variable(tf.random_normal(X_content_ph.get_shape())*0.256)
            preds_pre = preds
        
        else:
            # getting the generated image by transforming the content image
            
            preds = transform_net.net(X_content_ph/255.0)
            preds_pre = vgg.preprocess(preds)
         
        # passing the "preproccessed generated image" through the VGG19 network    
        gen_net = vgg.net(vgg_path,preds_pre)
        
        assert _tensor_size(content_img_activation[CONTENT_LAYER]) == _tensor_size(gen_net[CONTENT_LAYER])
                    
        # calculating the content loss
        content_img_size = _tensor_size(content_img_activation[CONTENT_LAYER])*batch_size
        
        content_loss = content_weight * ( 2 * 
                       tf.nn.l2_loss(gen_net[CONTENT_LAYER]-content_img_activation[CONTENT_LAYER])/content_img_size
                       )            
        
        # computing the generated image gram matrix features
        style_loss = []
        
        for style_layer in STYLE_LAYERS:
            
            activations = gen_net[style_layer]
            
            bs,height,width,filters = map(lambda i:i.value,activations.get_shape())
            activation_size = height*width*filters
            
            activations = tf.reshape(activations,(bs,height*width,filters))
            activations_T = tf.transpose(activations,perm=[0,2,1])
            
            gram = tf.matmul(activations_T,activations)/activation_size
            
            style_gram = style_img_gram_features[style_layer]
            
            style_loss.append(2 * tf.nn.l2_loss(gram-style_gram)/style_gram.size)
        
        style_loss = style_weight * functools.reduce(tf.add,style_loss)/batch_size
        
        # total variation denoising
        tv_y_size = _tensor_size(preds[:,1:,:,:])
        tv_x_size = _tensor_size(preds[:,:,1:,:])
        y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:train_batch_shape[1]-1,:,:])
        x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:train_batch_shape[2]-1,:])
        tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size
        
        # Defining total loss
        loss = content_loss + style_loss + tv_loss
        
        # Defining optimizer
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        
        # variabel initializing (tensorflow)
        sess.run(tf.global_variables_initializer())
        
        for epoch in range(epochs):
            
            num_examples = len(content_targets)
            
            print('no of training examples: ',num_examples)
            
            iterations = 0
            while iterations*batch_size < num_examples:
                start_time = dt.datetime.now()
                
                start_batch = iterations*batch_size
                end_batch = start_batch + batch_size
                
                iterations+=1
                
                X_input_feed = np.zeros(train_batch_shape,dtype = np.float32)
                
                # preparing the X_input_feed
                for j,input_img in enumerate(content_targets[start_batch:end_batch]):
                    X_input_feed[j] = get_img(input_img,(256,256,3)).astype(np.float32)
                    
                assert X_input_feed.shape[0] == batch_size
                
                # running optimer step
                feed_dict = { X_content_ph:X_input_feed}
                train_step.run(feed_dict=feed_dict)
                
                end_time = dt.datetime.now()
                
                if debug :
                    print("iteration : {} , epoch : {} , time for this iteration : {}".format(iterations,epoch,str(end_time-start_time)))
                    
                # if iterations >1000 i.e. num_examples is more than 1000*batch_size
                print_iter = int(iterations)%print_iterations == 0
                
                if slow:
                    print_iter = epoch % print_iterations == 0
                    
                is_last_iter = epoch == epochs-1 and iterations * batch_size >=num_examples
                
                should_print = print_iter or is_last_iter
                
                if should_print:
                    calculate_these  = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {X_content_ph:X_input_feed}
                    
                    # calcuating loss and preds i.e. generated image
                    _style_loss,_content_loss,_tv_loss,_loss,_preds = sess.run(calculate_these,
                                                                               feed_dict=test_feed_dict)
                    
                    losses = (_style_loss,_content_loss,_tv_loss,_loss)
                    
                    if slow:
                        _preds = vgg.unprocess(_preds)
                    else:
                        saver = tf.train.Saver()
                        res = saver.save(sess,save_path)
                        
                    yield (_preds,losses,iterations,epoch)
Beispiel #21
0
def do_shit(content,
            style,
            iterations=1000,
            learning_rate=1e0,
            content_weight=5,
            style_weight=1e2,
            smooth_weight=1e2):

    content = imresize(content, [256, 256])
    style = imresize(style, [256, 256])

    shape = (1, ) + content.shape
    content_features = {}

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(NETWORK, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
            feed_dict={image: content_pre})

    style_features = {}
    style_shape = (1, ) + style.shape
    # compute style features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shape)
        net, mean_pixel = vgg.net(NETWORK, image)
        style_pre = np.array([vgg.preprocess(style, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    # optimizing the image
    with tf.Graph().as_default():
        image = tf.Variable(tf.random_normal(shape) * 0.256)
        net, channel_avg = vgg.net(NETWORK, image)

        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                                         content_features[CONTENT_LAYER].size)

        style_loss = 0
        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            _, height, width, number = map(lambda i: i.value,
                                           layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) /
                                style_gram.size)
        style_loss = style_weight * reduce(tf.add, style_losses)

        y_size = _tensor_size(image[:, 1:, :, :])
        x_size = _tensor_size(image[:, :, 1:, :])
        smooth_loss = smooth_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / x_size))

        loss = content_loss + style_loss + smooth_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        im_out = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in xrange(iterations):
                print 'Iteration', i
                train_step.run()
            im = image.eval()
    return vgg.unprocess(im.reshape(shape[1:]), channel_avg)
Beispiel #22
0
def optimize(content_targets,
             style_target,
             content_weight,
             style_weight,
             tv_weight,
             vgg_path,
             epochs=2,
             print_iterations=1000,
             batch_size=4,
             save_path='saver/fns.ckpt',
             slow=False,
             learning_rate=1e-3,
             debug=False):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod]

    style_features = {}

    batch_shape = (batch_size, 256, 256, 3)
    style_shape = (1, ) + style_target.shape
    print(style_shape)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.333

    # precompute style features
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session(
            config=config) as sess:
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session(config=config) as sess:
        X_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        # tensorboar output
        train_writer = tf.summary.FileWriter('./logs/1/train', sess.graph)

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256)
            preds_pre = preds
        else:
            preds = transform.net(X_content / 255.0)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            net[CONTENT_LAYER])
        content_loss = content_weight * (
            2 * tf.nn.l2_loss(net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size)

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add,
                                                     style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size

        loss = content_loss + style_loss + tv_loss

        # tensorboard variables
        # batch_time = tf.Variable(0)
        # tf.summary.scalar("style_loss", style_loss)
        # tf.summary.scalar("content_loss", content_loss)
        # tf.summary.scalar("tv_loss", tv_loss)
        # tf.summary.scalar("loss", loss)
        # tf.summary.scalar("batch_time", batch_time)

        # overall loss
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = get_img(img_p,
                                         (256, 256, 3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {X_content: X_batch}

                sess.run(optimizer, feed_dict=feed_dict)
                end_time = time.time()
                batch_time = end_time - start_time
                # batch_time.load(end_time - start_time, sess)
                # merge = tf.summary.merge_all()
                # summary = sess.run(merge, feed_dict=feed_dict)
                # train_writer.add_summary(summary=summary, global_step=iterations)

                if iterations % 200 == 0:
                    print("batch time: " + str(batch_time) + " iteration: " +
                          str(iterations))

                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {X_content: X_batch}

                    tup = sess.run(to_get, feed_dict=test_feed_dict)
                    _style_loss, _content_loss, _tv_loss, _loss, _preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                        _preds = vgg.unprocess(_preds)
                    else:
                        saver = tf.train.Saver()
                        saver.save(sess, save_path)
                    yield (_preds, losses, iterations, epoch)
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path, epochs=2, print_iterations=1000,
             batch_size=4, save_path='saver/fns.ckpt', slow=False,
             learning_rate=1e-3, debug=False):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod] 

    style_features = {}

    batch_shape = (batch_size,256,256,3)
    style_shape = (1,) + style_target.shape
    print(style_shape)

    # precompute style features
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image:style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256
            )
            preds_pre = preds
        else:
            preds = transform.net(X_content/255.0)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(content_features[CONTENT_LAYER])*batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(net[CONTENT_LAYER])
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_size
        )

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i:i.value,layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0,2,1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:,1:,:,:])
        tv_x_size = _tensor_size(preds[:,:,1:,:])
        y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:batch_shape[1]-1,:,:])
        x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[2]-1,:])
        tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                   X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {
                   X_content:X_batch
                }

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {
                       X_content:X_batch
                    }

                    tup = sess.run(to_get, feed_dict = test_feed_dict)
                    _style_loss,_content_loss,_tv_loss,_loss,_preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                       _preds = vgg.unprocess(_preds)
                    else:
                       saver = tf.train.Saver()
                       res = saver.save(sess, save_path)
                    yield(_preds, losses, iterations, epoch)
def stylize_c(network,
              initial,
              initial_noiseblend,
              content,
              styles,
              preserve_colors,
              iterations,
              content_weight,
              content_weight_blend,
              style_weight,
              style_layer_weight_exp,
              style_blend_weights,
              tv_weight,
              learning_rate,
              beta1,
              beta2,
              epsilon,
              pooling,
              prev_style_image,
              prev_content_image,
              print_iterations=None,
              checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            #noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            noise = content
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        #content_layers_weights['conv2_2'] = content_weight_blend
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))

        #Continuity Loss
        #K = np.array([[1/256,4/256,6/256,4/256,1/256],
        #[4/256,16/256,24/256,16/256,4/256],
        #[6/256,24/256,36/256,24/256,6/256],
        #[4/256,16/256,24/256,16/256,4/256],
        #[1/256,4/256,6/256,4/256,1/256]], dtype=np.float32)
        #G_filt = np.zeros([5,5,3],dtype=np.float32)
        #G_filt[:,:,0] = K
        #G_filt[:,:,1] = K
        #G_filt[:,:,2] = K
        #filterG = tf.convert_to_tensor(G_filt,dtype=tf.float32)
        #filterG = tf.reshape(filterG, [5,5,3,1])
        #G_filt = tf.reshape(K, [5,5,1,1], name='G_filt')
        #G_filt = tf.convert_to_tensor(K, dtype=tf.float32)
        #tf.expand_dims(G_filt,0)
        #tf.expand_dims(G_filt,0)

        tf_org_img = tf.convert_to_tensor(content, dtype=tf.float32)
        tf_org_img = tf.reshape(tf_org_img, tf.shape(image))
        tf_prev_img = tf.convert_to_tensor(prev_content_image,
                                           dtype=tf.float32)
        tf_prev_img = tf.reshape(tf_prev_img, tf.shape(image))
        tf_prev_styl = tf.convert_to_tensor(prev_style_image, dtype=tf.float32)
        tf_prev_styl = tf.reshape(tf_prev_styl, tf.shape(image))

        #smth_org_frame_diff = tf.nn.conv2d(tf_org_img - tf_prev_img,filterG,strides=[1, 1, 1, 1],padding='VALID')
        #smth_styl_frame_diff = tf.nn.conv2d(image - tf_prev_styl,filterG,strides=[1, 1, 1, 1],padding='VALID')
        #org_frame_diff = tf.norm(smth_org_frame_diff)
        #styl_frame_diff = tf.norm(smth_styl_frame_diff)

        org_frame_diff = tf.norm(tf_org_img - tf_prev_img)
        styl_frame_diff = tf.norm(tf_prev_styl - image)

        hyperparam_cl = 10e4
        cl_loss = tf.multiply(
            hyperparam_cl,
            tf.divide(styl_frame_diff, org_frame_diff +
                      3 * content.shape[0] * content.shape[1]))

        # overall loss
        loss = content_loss + style_loss + cl_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('     content loss: %g\n' % content_loss.eval())
            stderr.write('       style loss: %g\n' % style_loss.eval())
            stderr.write('          tv loss: %g\n' % tv_loss.eval())
            stderr.write('  Continuity loss: %g\n' % cl_loss.eval())
            stderr.write('       total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(
                        iteration_times[-10:]) * (iterations - i)
                    stderr.write(
                        'Iteration %4d/%4d (%s elapsed, %s remaining)\n' %
                        (i + 1, iterations, hms(elapsed), hms(remaining)))

                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
Beispiel #25
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(iteration_times[-10:]) * (iterations - i)
                    stderr.write('Iteration %4d/%4d (%s elapsed, %s remaining)\n' % (
                        i + 1,
                        iterations,
                        hms(elapsed),
                        hms(remaining)
                    ))
                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
Beispiel #26
0
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print('Optimization started...')            

    for i in range(ITERATIONS):
        if i % 100 == 0:
            print('Iteration %4d/%4d' % (i + 1, ITERATIONS))
        train_step.run()
        last_step = (i == ITERATIONS - 1)

        if last_step:
            print('  content loss: %g' % content_loss.eval())
            print('    style loss: %g' % style_loss.eval())
            print('       tv loss: %g' % tv_loss.eval())
            print('    total loss: %g' % loss.eval())

            this_loss = loss.eval()
            if this_loss < best_loss:
                best_loss = this_loss
                best = image.eval()

            img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)




#-------------------------------------------------------------
# img_out은 float형이고 (-)(+)값이 제각각이므로 아래와 같이 데이터를 정제 후 출력해야 한다  
img = np.clip(img_out, 0, 255).astype(np.uint8)
plt.imshow(img)
Beispiel #27
0
def stylize(network,
            initial,
            content,
            styles,
            iterations,
            content_weight,
            style_weight,
            style_blend_weights,
            tv_weight,
            learning_rate,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/gpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
            feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/gpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    # make stylized image using backpropogation
    g = tf.Graph()
    with g.as_default(), g.device('/gpu:0'):
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                                         content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            stderr.write('Iteration %d/%d\n' % (i + 1, iterations))
            if last or (print_iterations and i % print_iterations == 0):
                stderr.write('  content loss: %g\n' % content_loss.eval())
                stderr.write('    style loss: %g\n' % style_loss.eval())
                stderr.write('       tv loss: %g\n' % tv_loss.eval())
                stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(iterations):
                last_step = (i == iterations - 1)
                print_progress(i, last=last_step)
                train_step.run()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                    yield ((None if last_step else i),
                           vgg.unprocess(best.reshape(shape[1:]), mean_pixel))
Beispiel #28
0
 def unprocess(self, image):
     return vgg.unprocess(image, self.mean_pixel)[0]
def optimize(
        content_targets,
        style_target,
        content_weight,
        style_weight,
        tv_weight,
        vgg_path,
        epochs=2,
        print_iterations=1000,
        batch_size=4,
        save_path='saver/fns.ckpt',
        slow=False,
        learning_rate=1e-3,
        debug=False,

        # more cli params
        data_format='NHWC',
        num_base_channels=32):

    #print ("optimize().data_format:{}".format(data_format))
    #print ("optimize().num_base_channels:{}".format(num_base_channels))

    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod]

    style_features = {}

    batch_shape = (batch_size, 256, 256, 3)
    #batch_shape = (batch_size,128,128,3) #mcky,smaller size for MX150
    style_shape = (1, ) + style_target.shape
    #print(style_shape)

    # precompute style features
    print("precompute style features")  #mcky
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        print("precompute content features")  #mcky
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256)
            preds_pre = preds
        else:
            if data_format == 'NHWC':
                #NHWC path
                preds = transform.net(X_content / 255.0,
                                      data_format=data_format,
                                      num_base_channels=num_base_channels)
            else:
                #NCHW path

                # use NCHW transformer net.  bug vgg net needs NHWC.  so transposes needed for input and output.
                #nhwc --> nchw --> transform.net --> nhwc
                x_content = X_content / 255.0
                X_content_nchw = tf.transpose(x_content, [0, 3, 1, 2])
                preds_nchw = transform.net(X_content_nchw,
                                           data_format=data_format,
                                           num_base_channels=num_base_channels)
                preds = tf.transpose(preds_nchw, [0, 2, 3, 1])

                print("preds.shape:{}".format(preds.shape))

            preds_pre = vgg.preprocess(preds)
            print("preds_pre.shape:{}".format(preds_pre.shape))

        net = vgg.net(
            vgg_path, preds_pre
        )  # <-- mcky, this is to feed the output of ITN to VGG, along with content data set.

        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            net[CONTENT_LAYER])
        content_loss = content_weight * (
            2 * tf.nn.l2_loss(net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size)

        print("style_losses")  #mcky
        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add,
                                                     style_losses) / batch_size

        # total variation denoising
        print("total variation denoising")  #mcky

        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size
        '''
        #mcky, tv for preds in nchw format
        tv_y_size = _tensor_size(preds[:,:,1:,:])
        tv_x_size = _tensor_size(preds[:,:,:,1:])
        y_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[1]-1,:])
        x_tv = tf.nn.l2_loss(preds[:,:,:,1:] - preds[:,:,:,:batch_shape[2]-1])
        tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size
        '''

        loss = content_loss + style_loss + tv_loss

        # overall loss
        #print("overall loss") #mcky
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())

        #mcky, Variables are printed here
        #for v in tf.global_variables():
        #    print (v)

        import random
        uid = random.randint(1, 100)
        print(
            "----------------------------------------------------------------------------------------------------"
        )  #mcky
        print("data_format:{}".format(data_format))
        print("num_base_channels:{}".format(num_base_channels))
        print("EPOCHS:{}".format(epochs))  #mcky
        print("UID: %s" % uid)
        for epoch in range(epochs):
            num_examples = len(content_targets)
            print("num_examples:{}".format(num_examples))  #mcky
            iterations = 0
            while iterations * batch_size < num_examples:
                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = get_img(img_p,
                                         (256, 256, 3)).astype(np.float32)
                    #X_batch[j] = get_img(img_p, (128,128,3)).astype(np.float32) #mcky, smaller size for MX150

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {X_content: X_batch}

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples

                should_print = is_print_iter or is_last
                #should_print = True#mcky, is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {X_content: X_batch}

                    tup = sess.run(to_get, feed_dict=test_feed_dict)
                    _style_loss, _content_loss, _tv_loss, _loss, _preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                        _preds = vgg.unprocess(_preds)
                    else:
                        saver = tf.train.Saver()
                        res = saver.save(sess, save_path)
                    yield (_preds, losses, iterations, epoch)
Beispiel #30
0
def main():
    content_path, style_path, width, style_scale = sys.argv[1:]
    width = int(width)
    style_scale = float(style_scale)

    content_image = imread(content_path)
    style_image = imread(style_path)

    if width > 0:
        new_shape = (int(math.floor(float(content_image.shape[0]) /
                content_image.shape[1] * width)), width)
        content_image = sm.imresize(content_image, new_shape)
    if style_scale > 0:
        style_image = sm.imresize(style_image, style_scale)

    shape = (1,) + content_image.shape
    style_shape = (1,) + style_image.shape

    content_features = {}
    style_features = {}
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(VGG_PATH, image)
        content_pre = np.array([vgg.preprocess(content_image, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shape)
        net, _ = vgg.net(VGG_PATH, image)
        style_pre = np.array([vgg.preprocess(style_image, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            grammatrix = np.matmul(features.T, features)
            style_features[layer] = grammatrix

    g = tf.Graph()
    with g.as_default():
        global_step = tf.Variable(0, trainable=False)
        noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1)
        content_pre = vgg.preprocess(content_image, mean_pixel)
        init = content_pre * (1 - NOISE_RATIO) + noise * NOISE_RATIO
        init = init.astype('float32')
        image = tf.Variable(init)
        net, _ = vgg.net(VGG_PATH, image)

        content_loss = tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats)

            style_gram = style_features[i]

            style_losses.append(tf.nn.l2_loss(gram - style_gram) /
                    (4.0 * number ** 2 * (height * width) ** 2))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        loss = ALPHA * content_loss + BETA * style_loss

        learning_rate = tf.train.exponential_decay(LEARNING_RATE_INITIAL,
                global_step, LEARNING_DECAY_STEPS, LEARNING_DECAY_BASE,
                staircase=True)
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss,
                global_step=global_step)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(100000):
                print 'i = %d' % i
                imsave('%05d.jpg' % i, vgg.unprocess(
                        image.eval().reshape(shape[1:]), mean_pixel))
                train_step.run()
Beispiel #31
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None,
        print_image_iterations=False):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)
                train_step.run()
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)
                if (i % 100 == 0) and (print_image_iterations):
					temp_image = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
					temp_output = 'iteration_' + str(i) + '.jpg'
					imsave(temp_output, temp_image)
            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image, loss_vals) at every
    iteration. However `image` and `loss_vals` are None by default. Each
    `checkpoint_iterations`, `image` is not None. Each `print_iterations`,
    `loss_vals` is not None.

    `loss_vals` is a dict with loss values for the current iteration, e.g.
    ``{'content': 1.23, 'style': 4.56, 'tv': 7.89, 'total': 13.68}``.

    :rtype: iterator[tuple[int,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))

        # total loss
        loss = content_loss + style_loss + tv_loss

        # We use OrderedDict to make sure we have the same order of loss types
        # (content, tv, style, total) as defined by the initial costruction of
        # the loss_store dict. This is important for print_progress() and
        # saving loss_arrs (column order) in the main script.
        #
        # Subtle Gotcha (tested with Python 3.5): The syntax
        # OrderedDict(key1=val1, key2=val2, ...) does /not/ create the same
        # order since, apparently, it first creates a normal dict with random
        # order (< Python 3.7) and then wraps that in an OrderedDict. We have
        # to pass in a data structure which is already ordered. I'd call this a
        # bug, since both constructor syntax variants result in different
        # objects. In 3.6, the order is preserved in dict() in CPython, in 3.7
        # they finally made it part of the language spec. Thank you!
        loss_store = OrderedDict([('content', content_loss),
                                  ('style', style_loss), ('tv', tv_loss),
                                  ('total', loss)])

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            print('Optimization started...')
            if (print_iterations and print_iterations != 0):
                print_progress(get_loss_vals(loss_store))
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(
                        iteration_times[-10:]) * (iterations - i)
                    print('Iteration %4d/%4d (%s elapsed, %s remaining)' %
                          (i + 1, iterations, hms(elapsed), hms(remaining)))
                else:
                    print('Iteration %4d/%4d' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    loss_vals = get_loss_vals(loss_store)
                    print_progress(loss_vals)
                else:
                    loss_vals = None

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))
                else:
                    img_out = None

                yield i + 1 if last_step else i, img_out, loss_vals

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            content_mask,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    mask_shape = (1, ) + content.shape[0:2] + (1, )
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]
    content_mask_features = {}
    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        mask = tf.placeholder('float', shape=mask_shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        net_mask = vgg.net_downsample(vgg_weights, mask)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})
        for layer in CONTENT_LAYERS + STYLE_LAYERS:
            content_mask_features[layer] = net_mask[layer].eval(feed_dict={
                mask:
                np.expand_dims(np.expand_dims(content_mask, axis=0), axis=4)
            })
#            plt.imshow(np.squeeze(content_mask_features[layer]))
#            plt.show()
#            plt.pause(0.01)

# compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features_bank = sk_image.extract_patches_2d(
                    np.squeeze(features), (kernel_s, kernel_s))
                style_features[i][layer] = [features_bank, features]

#    plt.imshow(np.squeeze(initial).astype(np.uint8))
#    plt.show()
#    plt.pause(0.01)

# make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            #            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')


#            initial_content_noise_coeff = 1.0 - initial_noiseblend
#            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
#            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)

#        plt.imshow(np.squeeze(initial))
#        plt.show()
#        plt.pause(0.01)

        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        # content loss
        content_layers_weights = {}
        for layer in CONTENT_LAYERS:
            content_layers_weights[layer] = 1. / len(CONTENT_LAYERS)
        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            map_ = (net[content_layer] - content_features[content_layer])
            #            map_ = (net[content_layer] - content_features[content_layer])*(1.-content_mask_features[content_layer])
            loss_ = content_layers_weights[content_layer] * content_weight * (
                2 * tf.nn.l2_loss(map_) / content_features[content_layer].size)
            content_losses.append(loss_)
        content_loss += reduce(tf.add, content_losses)
        #        plt.imshow(1-np.squeeze(content_mask_features[content_layer]))
        #        plt.show()
        #        plt.pause(100)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                # Calculate normalized layer
                layer = tf.expand_dims(net[style_layer], axis=4)
                paddings = [[0, 0], [pad, pad], [pad, pad], [0, 0], [0, 0]]
                layer_depth = layer.get_shape().as_list()[3]
                layer_pad = tf.pad(layer, paddings, "CONSTANT")
                layer_norm = tf.sqrt(
                    tf.nn.conv3d(tf.pow(layer_pad, 2),
                                 tf.ones(
                                     (kernel_s, kernel_s, layer_depth, 1, 1),
                                     dtype=tf.float32),
                                 strides=[1, 1, 1, 1, 1],
                                 padding='VALID'))
                # Calculate normalized filter bank
                style_filters = np.transpose(style_features[i][style_layer][0],
                                             (1, 2, 3, 0))
                style_filters = np.expand_dims(style_filters, axis=3)
                style_filters_norm = np.sqrt(
                    np.sum(np.power(style_filters, 2), axis=(0, 1, 2)))
                style_filters_normalized = style_filters / style_filters_norm
                # Calculate normalized correlations
                layer_filtered = tf.nn.conv3d(layer_pad,
                                              style_filters_normalized,
                                              strides=[1, 1, 1, 1, 1],
                                              padding='VALID') / layer_norm
                # Find maximum response and index into the filters
                max_filter_response_idx = tf.squeeze(
                    tf.argmax(layer_filtered, axis=4))
                max_filter_response_idx = tf.reshape(max_filter_response_idx,
                                                     [-1])
                max_filter_response_weight = tf.squeeze(
                    tf.reduce_max(tf.abs(layer_filtered), axis=4))
                max_filter_response_weight = tf.reshape(
                    max_filter_response_weight, [-1])
                max_filter_response_weight = max_filter_response_weight / tf.reduce_max(
                    max_filter_response_weight)
                style_filters_tf = tf.transpose(
                    tf.squeeze(tf.convert_to_tensor(style_filters,
                                                    np.float32)), (3, 0, 1, 2))
                style_filters_tf_gathered = tf.gather(style_filters_tf,
                                                      max_filter_response_idx)
                style_filters_tf_gathered = tf.reshape(
                    style_filters_tf_gathered,
                    (style_filters_tf_gathered.get_shape().as_list()[0], -1))
                layer_patches = tf.extract_image_patches(
                    tf.squeeze(layer_pad, axis=4), [1, kernel_s, kernel_s, 1],
                    [1, 1, 1, 1], [1, 1, 1, 1],
                    padding="VALID")
                layer_size = tf.shape(layer_patches)
                layer_patches = tf.reshape(layer_patches, (-1, layer_size[3]))
                style_weights = np.reshape(content_mask_features[style_layer],
                                           (-1))
                #                loss_ = tf.reduce_mean(tf.reduce_mean(tf.pow(layer_patches-style_filters_tf_gathered, 2),axis=1)*tf.stop_gradient(max_filter_response_weight))
                loss_ = tf.reduce_mean(
                    tf.reduce_mean(tf.pow(
                        layer_patches - style_filters_tf_gathered, 2),
                                   axis=1) * style_weights)
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    loss_)

            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                #                print(str(max_filter_response_weight.eval()))
                #                print(' ')
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Beispiel #34
0
def stylize(style_image,
            content_image,
            alpha,
            beta,
            iterations,
            vgg_path,
            use_avg_pool=False):
    # game plan:
    # precompute gram matrices for each content and style layer
    # make loss function with squared differences
    # optimize across
    style_shape = (1, ) + style_image.shape
    with tf.Graph().as_default(), tf.Session() as sess:
        print("precomputing style grams")
        style_image_placeholder = vgg.preprocess(
            tf.placeholder(tf.float32, shape=style_shape, name='style_image'))
        style_net = vgg.net(vgg_path, style_image_placeholder, use_avg_pool)
        style_grams = {}
        style_pre = np.array([vgg.preprocess(style_image)])
        for style_layer in STYLE_LAYERS:
            features = style_net[style_layer].eval(
                feed_dict={style_image_placeholder: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            style_grams[style_layer] = np.matmul(features.transpose(),
                                                 features)

        print("precomputing content grams")
        content_shape = (1, ) + content_image.shape
        content_image_placeholder = tf.placeholder(tf.float32,
                                                   shape=content_shape,
                                                   name='content_image')
        content_net = vgg.net(vgg_path, content_image_placeholder,
                              use_avg_pool)
        content_grams = {}
        content_pre = np.array([vgg.preprocess(content_image)])
        content_grams[CONTENT_LAYER] = content_net[CONTENT_LAYER].eval(
            feed_dict={content_image_placeholder: content_pre})

    with tf.Graph().as_default():
        # White noise image. 0.256 is taken from online
        initial_image = tf.random_normal(content_shape) * 0.256
        image = tf.Variable(initial_image)
        net = vgg.net(vgg_path, image, use_avg_pool)

        # Content Loss
        # FROM ONLINE:
        # content_weight * (2 * tf.nn.l2_loss(
        #         net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
        #         content_features[CONTENT_LAYER].size)

        # Change this later.

        loss_content = tf.nn.l2_loss(
            net[CONTENT_LAYER] -
            content_grams[CONTENT_LAYER]) / content_grams[CONTENT_LAYER].size

        # Style Loss

        losses_style = []
        style_net = vgg.net(vgg_path, image, use_avg_pool)

        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            _, height, width, number = map(lambda i: i.value,
                                           layer.get_shape())
            features = tf.reshape(layer, (-1, number))
            size = height * width * number
            gram = tf.matmul(tf.transpose(features), features) / size

            losses_style.append(
                tf.nn.l2_loss(gram - style_grams[style_layer]) /
                style_grams[style_layer].size)

        loss_style = np.sum(losses_style)

        loss = alpha * loss_content + beta * loss_style

        train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            print("starting training")
            for i in range(iterations):
                last_step = (i == iterations - 1)
                train_step.run()

                if last_step:
                    print("finished")
                    return vgg.unprocess(image.eval().reshape(style_shape[1:]))
Beispiel #35
0
def stylize(Ray_render,
            ray_steps,
            reset_opp,
            session,
            network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)
                                ]).astype(np.float32)
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    image = initial - tf.cast(tf.reshape(vgg_mean_pixel,
                                         (1, 1, 1, 3)), tf.float32)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    # content loss
    content_layers_weights = 1 / (1.0 * len(CONTENT_LAYERS))

    content_loss = 0
    content_losses = []
    for content_layer in CONTENT_LAYERS:
        content_losses.append(
            content_layers_weights * content_weight *
            (2 * tf.nn.l2_loss(net[content_layer] -
                               content_features[content_layer]) /
             content_features[content_layer].size))
    content_loss += reduce(tf.add, content_losses)

    # style loss
    style_loss = 0
    # overall loss
    loss = content_loss + style_loss  #+ tv_loss

    # optimizer setup
    render_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope='parameters')
    with tf.variable_scope('OPTIMIZATION', reuse=tf.AUTO_REUSE):
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(
                                                loss, var_list=render_vars)
    session.run(tf.initialize_all_variables())
    Ray_render.trace(session, ray_steps, reset_opp, num_steps=50)

    #    evals_ = session.run(tf.squeeze(initial,axis=0)) # <= returns jpeg data you can write to disk

    def print_progress():
        stderr.write('  content loss: %g\n' %
                     content_loss.eval(session=session))
        #        stderr.write('    style loss: %g\n' % style_loss.eval(session=session))
        stderr.write('    total loss: %g\n' % loss.eval(session=session))

    print_progress()

    #    aa= np.squeeze(net[CONTENT_LAYERS[0]].eval(session=session),0)
    #    bb = np.squeeze(content_features[CONTENT_LAYERS[0]],0)
    #    pic_aa=np.squeeze(content)
    #    pic_bb=np.squeeze(initial.eval(session=session),0)
    #
    #    fig = plt.figure(1)
    #    ax2 = fig.add_subplot(1, 1, 1)
    #    ax2.imshow(pic_aa)
    #
    #    fig = plt.figure(2)
    #    ax2 = fig.add_subplot(1, 1, 1)
    #    ax2.imshow(pic_bb)
    #    aa.aa=1

    # optimization
    stderr.write('Optimization started...\n')
    if (print_iterations and print_iterations != 0):
        print_progress()
    for i in range(iterations):
        stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))

        train_step.run(session=session)
        Ray_render.trace(session, ray_steps, reset_opp, num_steps=50)
        last_step = (i == iterations - 1)
        print_progress()

        if (checkpoint_iterations
                and i % checkpoint_iterations == 0) or last_step:
            image_ = image.eval(session=session)
            img_out = vgg.unprocess(image_.reshape(shape[1:]), vgg_mean_pixel)

            if preserve_colors and preserve_colors == True:
                original_image = np.clip(content, 0, 255)
                styled_image = np.clip(img_out, 0, 255)

                # Luminosity transfer steps:
                # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                # 2. Convert stylized grayscale into YUV (YCbCr)
                # 3. Convert original image into YUV (YCbCr)
                # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                # 5. Convert recombined image from YUV back to RGB

                # 1
                styled_grayscale = rgb2gray(styled_image)
                styled_grayscale_rgb = gray2rgb(styled_grayscale)

                # 2
                styled_grayscale_yuv = np.array(
                    Image.fromarray(styled_grayscale_rgb.astype(
                        np.uint8)).convert('YCbCr'))

                # 3
                original_yuv = np.array(
                    Image.fromarray(original_image.astype(
                        np.uint8)).convert('YCbCr'))

                # 4
                w, h, _ = original_image.shape
                combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                combined_yuv[..., 1] = original_yuv[..., 1]
                combined_yuv[..., 2] = original_yuv[..., 2]

                # 5
                img_out = np.array(
                    Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))

            yield ((None if last_step else i), img_out)
Beispiel #36
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None,
        vgg_weights=None, vgg_mean_pixel=None, # Added so that they are no reloaded every time
        content_features=None): # Added so that they are not recomputed every time
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    style_features = [{} for _ in styles]

    # Added option to have the net pre-loaded before calling the method
    if vgg_weights is None or vgg_mean_pixel is None:
        vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # Jacob: These content features only need to be computed once, and can be reused for
    #        each new style image.
    # compute content features in feedforward mode
    if content_features is None:
        content_features = {}
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=shape)
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
            for layer in CONTENT_LAYERS:
                content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )
Beispiel #37
0
def stylize(network, initial, initial_noiseblend, content, styles, matte,
        preserve_colors, iterations, content_weight, content_weight_blend,
        style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        matte_weight, learning_rate, beta1, beta2, epsilon, pooling,
        output, dest_txt, dest_fig,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded       
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape                               #rajoute un 1 en tant que 1ere dimension de content
    style_shapes = [(1,) + style.shape for style in styles]    #idem sur les images de style 
    content_features = {}                                      #Création dico 
    style_features = [{} for _ in styles]                      #idem pour chaque image de style 

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)         

    print('\n',vgg_mean_pixel.shape,'\n')

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight  # => relu1_1 : 1  ;  relu2_1 : 1*style_layer_weight_exp  ; ... ;  relu5_1 : (style_layer_weight_exp)**4
        layer_weight *= style_layer_weight_exp            # (default : style_layer_weight_exp=1) => seulement des 1

    # normalize style layer weights => sum=1
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum    # => on obtient 1 liste normalisée à 5 élts pour chaque image de style

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:   #Toutes les opérations construites dans ce contexte (indentées) seront placées sur le CPU:0 et dans le graphe g
                                                                     #"with Session" ferme la session lorsque c'est terminé       
        image = tf.placeholder('float', shape = shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)         #dictionnaire associant à chaque élt de VGG19-LAYERS un tensor , shape.len=4
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})
        

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])   #retourne une matrice image_style[i] - vgg_mean_pixel
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
#                print("\n")
#                print(features)
#                print("shape",features.shape, features.size)
#                print("\n")
                features = np.reshape(features, (-1, features.shape[3]))
#                print("\n")
#                print(features)
#                print("shape",features.shape, features.size)
#                print("\n")
                gram = np.matmul(features.T, features) / features.size   #matmul = matrix multiplication  => gram=[features(transposée) x features] / features.size
                style_features[i][layer] = gram                          #style_features = liste de dictionnaires

    initial_content_noise_coeff = 1.0 - initial_noiseblend     #noiseblend = input (optionnel)

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        #initial = tf.random_normal(shape) * 0                              #image de départ = blanche

        if initial is None:                                                     #initial = image de laquelle on part pour construire l'image suivante
            #noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256                           #initial non renseignée => aléatoire                    
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])       # initial - mean_pixel
            initial = initial.astype('float32')
            #noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
        initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)  
        #(default : initial_noiseblend=0) => initial = inchangé
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)


        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend      #default : content_weight_blend = 1      ==>...['relu4_2]=1      
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend                                        #==>...['relu5_2]=0

        content_loss = 0          #initialisation inutile mais on garde le même format pour style loss
        content_losses = []
        for content_layer in CONTENT_LAYERS:              #CONTENT_LAYERS = ('relu4_2', 'relu5_2')
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(             #content_weight = alpha/2
                    net[content_layer] - content_features[content_layer]) / content_features[content_layer].size))         #content_losses = liste de 2 élts
                    #net[content_layer] = features de l'image générée ; content_features[content_layer] = features de l'image d'origine 
        content_loss += reduce(tf.add, content_losses)       # = somme des élts de content_losses (on calcule l'erreur sur chaque layer, puis on additionne ces erreurs)
#(default : content_layers_weights['relu5_2]=0 => content_loss = content_losses[0])

        # style loss
        style_loss = 0
        for i in range(len(styles)):       #nb d'images de style
            style_losses = []
            for style_layer in STYLE_LAYERS:             #STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
                layer = net[style_layer]
                _, height, width, number = map(lambda j: j.value, layer.get_shape())   # "_" => discard the first elt of the tuple
                        #lambda = definit la fonction qui a j associe j.value ; map applique la fonction a tous les élts de layer.get_shape)
                size = height * width * number
#                print("number ",number)
#                print("layer.shape",layer.get_shape())
                feats = tf.reshape(layer, (-1, number))      #supprime dim0 (=1), dim0=dim1*dim2, dim1=dim3=number      => shape = (dim1*dim2 , number)
#                print("feats.shape",feats.get_shape())
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]  #style_features = liste de dictionnaires initialisée dans "compute style featurs in feedforwardmode"
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) #liste contenant les erreurs de tous les layers de l'image i
                #gram = style representation of generated image ; style_gram = style representation of original image 
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)   
            #incrémentation de style_loss : reduce=sum(err layers de im[i]) ; style_weight = poids du style par rapp au content 
            # style_blend_weights[i] = pids de l'im. i par rapp aux autres
            # += => on somme les losses de toutes les images

        # matting lapacian loss
        loader = np.load(matte)
        lcoo = csr_matrix((loader['data'], loader['indices'], loader['indptr']),
                        shape=loader['shape']).tocoo()
        lindices = np.mat([lcoo.row, lcoo.col]).transpose()
        lvalues = tf.constant(lcoo.data,  dtype=tf.float32)
        laplacian = tf.SparseTensor(indices=lindices, values=lvalues, dense_shape=lcoo.shape)

        matte_loss = 0
        matte_losses = []
        for i in range(3):
            imr = tf.reshape(image[:,:,:,i], [-1, 1])
            matte_losses.append(
                tf.matmul(tf.transpose(imr),
                          tf.sparse_tensor_dense_matmul(laplacian, imr))[0][0]
            )
        matte_loss += matte_weight * reduce(tf.add, matte_losses)


        # total variation denoising                       (pas très important : a remplacer par une autre loss ?)
        print("\n total variation denoising")            #(possible de désactiver la tv loss avec la commande --tv-weight 0)   

        tv_y_size = _tensor_size(image[:,1:,:,:])
        print(tv_y_size)
        tv_x_size = _tensor_size(image[:,:,1:,:])
        print(tv_x_size)
        print("\n")
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))


        # GAN loss
        


                
        # overall loss
        loss = content_loss + style_loss + matte_loss + tv_loss    # make alpha etc appear (coeffs)

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)       # (operation qui met a jour les variables pour que total loss soit minimise) 
         # quelles variables ???
        
        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
           # stderr.write('    matte loss: %g\n' % matte_loss.eval())
            stderr.write('      GAN loss: %g\n' % GAN_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())
        
        
        
        # optimization
        best_loss = float('inf')                          #???
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())    #initialise les variables globales
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):     #Si on a rentré un pas pour print_iterations, on affiche avant la 1ere iteration les loss e initial
                print_progress()
                
            c_loss = []                #initialisation des listes de valeurs de loss
            s_loss = []
            t_loss = []
            tot_loss = []    
            x=[i+1 for i in range(iterations)]   #initialisation des abscisses des graphes
            
            for i in range(iterations):
                
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()                                               #on minimise loss a chaque itération

                c_loss.append(content_loss.eval())         #incrémentation des listes de valeurs de loss pour chaque itération
                s_loss.append(style_loss.eval())
                t_loss.append(tv_loss.eval())
                tot_loss.append(loss.eval())

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0)   : #i % print_iterations = reste de la diveucl de i par print_iterations      
                    print_progress()                                                 #On affiche les loss instantannées avec une fréquence = print_iterations
                    if last_step :
                        if dest_txt is None:
                            l=len(output)-4                #Création d'un fichier contenant les losses (même nom que l'output mais .txt)
                            file=output[:l]
                            F=open(''.join([file,'.txt']),'x')      #fusionne file et '.txt'
                            F.writelines(['  content loss: %g\n' % content_loss.eval() , '    style loss: %g\n' % style_loss.eval() , 
                                          '       tv loss: %g\n' % tv_loss.eval() , '    total loss: %g\n' % loss.eval()])
                            F.close
                        else:
                            F=open(dest_txt,'x')   
                            F.writelines(['  content loss: %g\n' % content_loss.eval() , '    style loss: %g\n' % style_loss.eval() , 
                                          '       tv loss: %g\n' % tv_loss.eval() , '    total loss: %g\n' % loss.eval()])
                            F.close
                        
                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:          
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()            #on associe l'image finale à la meilleure loss totale

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)
                    
                    if preserve_colors and preserve_colors == True:                           #preserve-colors
                        original_image = np.clip(content, 0, 255)        #clip = tous les élts de content >255 -->255, idem <0 -->0
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )
            
            
            #Nom de la destination des courbes
            if dest_fig is None :               
                l=len(output)-4                
                file=output[:l]
                dest_fig=''.join([file,'_fig','.jpg'])
                
            print('dest_fig',dest_fig)


            #Tracé des graphes
            plt.figure(1)
            plt.title("Différents types d'erreurs - graphe classique et graphe semi-logarithmique")
            plt.subplot(2,1,1)
            plt.plot(x, c_loss, label='content_loss')
            plt.plot(x, s_loss, label='style_loss')
            plt.plot(x, t_loss, label='tv_loss')
            plt.plot(x, tot_loss, label='total_loss')
            plt.grid('on')
            plt.axis('tight')
            plt.legend()
            plt.ylabel('erreur')
            
            plt.subplot(2,1,2)
            plt.semilogy(x, c_loss, label='content_loss')
            plt.semilogy(x, s_loss, label='style_loss')
            plt.semilogy(x, t_loss, label='tv_loss')
            plt.semilogy(x, tot_loss, label='total_loss')
            plt.grid('on')
            plt.axis('tight')                         
            plt.xlabel("i (Nombre d'itérations)")
            plt.ylabel('erreur')
            plt.savefig(dest_fig)
Beispiel #38
0
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path, epochs=2, print_iterations=1000,
             batch_size=4, save_path='saver/fns.ckpt', slow=False,
             learning_rate=1e-3, debug=False):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod] 

    style_features = {}

    batch_shape = (batch_size,256,256,3)
    style_shape = (1,) + style_target.shape
    print(style_shape)

    # precompute style features
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image:style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256
            )
            preds_pre = preds
        else:
            # Add sin(k1*x+ k2*y+ phi) with 3 channels 
            # In sess.run, feed_dict should add "phi"
            k=tf.get_variable("K",[2,3],tf.float32,tf.random_normal_initializer(stddev=0.02))
            phi=tf.placeholder(tf.float32,[3],name="random_phase_offset")
            p_noise=transform.periodic_noise(k=k,phi=phi,dp=3,batch_size=4)
            X_input=X_content/255.0 
            X_input=tf.concat((X_input,p_noise),3) # X_input range (0~1), p_noise range (-1~1)
            preds = transform.net(X_input)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(content_features[CONTENT_LAYER])*batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(net[CONTENT_LAYER])
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_size
        )

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i:i.value,layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0,2,1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:,1:,:,:])
        tv_x_size = _tensor_size(preds[:,:,1:,:])
        y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:batch_shape[1]-1,:,:])
        x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[2]-1,:])
        tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                   X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size
                batch_phi = np.random.uniform(0, np.pi*2, size = (self.dp))
                feed_dict = {
                   X_content:X_batch
                   phi:batch_phi
                }

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {
                       X_content:X_batch
                       phi:batch_phi
                    }

                    tup = sess.run(to_get, feed_dict = test_feed_dict)
                    _style_loss,_content_loss,_tv_loss,_loss,_preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                       _preds = vgg.unprocess(_preds)
                    else:
                       saver = tf.train.Saver()
                       res = saver.save(sess, save_path)
                    yield(_preds, losses, iterations, epoch)
Beispiel #39
0
def stylize(
    network,
    initial,
    content,
    style,
    iterations,
    content_weight,
    style_weight,
    tv_weight,
    learning_rate,
    print_iter=None,
):
    shape = (1,) + content.shape
    style_shape = (1,) + style.shape
    content_features = {}
    style_features = {}

    g = tf.Graph()
    with g.as_default(), g.device("/cpu:0"), tf.Session() as sess:
        image = tf.placeholder("float", shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device("/cpu:0"), tf.Session() as sess:
        image = tf.placeholder("float", shape=style_shape)
        net, _ = vgg.net(network, image)
        style_pre = np.array([vgg.preprocess(style, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / (features.size)
            style_features[layer] = gram

    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 256 / 1000
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype("float32")
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        content_loss = tf.nn.l2_loss(net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / (size)
            style_gram = style_features[i]
            style_losses.append(tf.nn.l2_loss(gram - style_gram))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        tv_loss = tf.nn.l2_loss(image[:, 1:, :, :] - image[:, : shape[1] - 1, :, :]) + tf.nn.l2_loss(
            image[:, :, 1:, :] - image[:, :, : shape[2] - 1, :]
        )
        loss = content_weight * content_loss + style_weight * style_loss + tv_weight * tv_loss

        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                if print_iter is not None and i % print_iter == 0:
                    print "  content loss: %g" % (content_loss.eval())
                    print "    style loss: %g" % (style_loss.eval())
                    print "       tv loss: %g" % (tv_loss.eval())
                    print "    total loss: %g" % loss.eval()
                print "Iteration %d/%d" % (i + 1, iterations)
                train_step.run()
            return vgg.unprocess(image.eval().reshape(shape[1:]), mean_pixel)