Beispiel #1
0
    loss_discrim = -tf.reduce_sum(
        discrim_target *
        tf.log(tf.clip_by_value(discrim_predictions, 1e-10, 1.0))
    )  # Note: here use tf.reduce_sum, not use tf.reduce_mean
    loss_texture = -loss_discrim

    correct_predictions = tf.equal(tf.argmax(discrim_predictions, 1),
                                   tf.argmax(discrim_target, 1))
    discim_accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

    # 2) content loss

    CX_LAYER = 'conv4_2'

    enhanced_vgg = vgg.net(vgg_dir, vgg.preprocess(enhanced * 255))
    dslr_vgg = vgg.net(vgg_dir, vgg.preprocess(dslr_image * 255))

    # SSIM loss
    ssim_loss = 25 * (1 - utils.ssim(dslr_image, enhanced) / batch_size)

    # CX loss
    cx_loss = 4 * CX_loss_helper(dslr_vgg[CX_LAYER], enhanced_vgg[CX_LAYER],
                                 config_CX)

    # content loss
    loss_content = ssim_loss + cx_loss

    # 3) color loss

    enhanced_blur = utils.blur(enhanced)
Beispiel #2
0
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            exp_sigma,
            mat_sigma,
            mat_rho,
            text_to_print,
            print_iterations=None,
            checkpoint_iterations=None,
            kernel=3,
            d=2,
            gamma_rho=1,
            gamma=1,
            rational_rho=1,
            alpha=1):

    tf.logging.set_verbosity(tf.logging.INFO)
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]

    0 - dot product kernel
    1 - exponential kernel
    2 - matern kernel
    3 - polynomial kernel

    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))

                # sqr = features.T*features.T
                # dim = features.shape

                if (kernel == 0):
                    gram2 = np.matmul(features.T, features) / features.size
                elif (kernel == 1):
                    gram2 = gramSquaredExp_np(
                        features,
                        exp_sigma) / features.size  # exponential kernal
                elif (kernel == 2):
                    gram2 = gramMatten_np(
                        features, mat_sigma, v,
                        mat_rho) / features.size  # Mattern kernal
                elif (kernel == 3):
                    print(d)
                    gram2 = gramPoly_np(features, C=0, d=d) / features.size
                elif (kernel == 4):
                    gram2 = gramGammaExp_np(features, gamma_rho,
                                            gamma) / features.size
                elif (kernel == 5):
                    gram2 = gramRatioanlQuad_np(features, rational_rho,
                                                alpha) / features.size

                    # print(features.shape,"diamention of feature\n")
                style_features[i][layer] = gram2

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    g = tf.Graph()
    with g.as_default(), g.device('/gpu'):
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))

                style_gram = style_features[i][style_layer]

                dim = feats.get_shape()
                # print(dim)

                sqr = tf.reduce_sum(tf.transpose(feats) * tf.transpose(feats),
                                    axis=1)
                d2 = tf.nn.relu(
                    tf.transpose(tf.ones([dim[1], dim[1]]) * sqr) +
                    tf.ones([dim[1], dim[1]]) * sqr -
                    2 * tf.matmul(tf.transpose(feats), feats))

                if (kernel == 0):
                    gram = (tf.matmul(tf.transpose(feats), feats)) / size
                elif (kernel == 1):
                    gram = tf.exp(
                        -1 * (tf.transpose(tf.ones([dim[1], dim[1]]) * sqr) +
                              tf.ones([dim[1], dim[1]]) * sqr -
                              2 * tf.matmul(tf.transpose(feats), feats)) / 2 /
                        (exp_sigma * exp_sigma)) / size  # exponetial kernal
                elif (kernel == 2):
                    # mattern kernal
                    if (v == 0.5):
                        gram = mat_sigma**2 * tf.exp(
                            -1 * tf.sqrt(d2) / mat_rho) / size
                    elif (v == 1.5):
                        gram = mat_sigma**2 * (tf.ones([
                            dim[1], dim[1]
                        ]) + tf.sqrt(3.0) * tf.sqrt(d2) / mat_rho) * tf.exp(
                            -1 * tf.sqrt(3.0) * tf.sqrt(d2) / mat_rho) / size
                    elif (v == 2.5):
                        gram = mat_sigma**2 * (
                            tf.ones([dim[1], dim[1]]) +
                            tf.sqrt(5.0) * tf.sqrt(d2) / mat_rho + 5 * d2 / 3 /
                            (mat_rho**2)) * tf.exp(
                                -1 * tf.sqrt(5.0) * tf.sqrt(d2) /
                                mat_rho) / size
                elif (kernel == 3):
                    # polynomial kernal
                    gram = (tf.matmul(tf.transpose(feats), feats))**d / size
                elif (kernel == 4):
                    # gamma exponental kernal
                    gram = tf.exp(-1 * (tf.sqrt(d2) / gamma_rho)**gamma) / size
                elif (kernel == 5):
                    # gamma exponental kernal
                    gram = (1 +
                            (d2 / rational_rho**2 / 2 / alpha))**(-1 *
                                                                  alpha) / size

                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)

            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])

        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        # train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress(last_loss):
            new_loss = loss.eval()
            stderr.write('file ===>  %s \n' % text_to_print)
            stderr.write('  content loss: %1.3e \t' % content_loss.eval())
            stderr.write('    style loss: %1.3e \t' % style_loss.eval())
            stderr.write('       tv loss: %1.3e \t' % tv_loss.eval())
            stderr.write('    total loss: %1.3e \t' % new_loss)
            stderr.write('    loss difference: %1.3e \t\n' %
                         (last_loss - new_loss))
            return new_loss

        def save_progress():
            dict = {
                "content loss": content_loss.eval(),
                "style loss": style_loss.eval(),
                "tv loss": tv_loss.eval(),
                "total loss": loss.eval()
            }
            return dict

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            new_loss = 0
            # if (print_iterations and print_iterations != 0):
            #     print_progress()
            for i in range(iterations):
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                    new_loss = print_progress(new_loss)

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    dict = save_progress()
                    this_loss = loss.eval()
                    print(this_loss, "loss in each check point")
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    try:
                        img_out = vgg.unprocess(best.reshape(shape[1:]),
                                                vgg_mean_pixel)
                    except:
                        print(
                            "uanlabe to result image due to given parameters")
                        img_out = "no  image"

                    if preserve_colors and preserve_colors:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out, dict)
Beispiel #3
0
    def train(self):
        with tf.Session() as sess:
            out_im = self.U_net(self.holder[41] / 127.5 - 1)

            gt_resize = tf.image.resize_images(self.holder[42] / 127.5 - 1,
                                               [256, 256])
            image_pre = vgg.preprocess(gt_resize)
            fai_imgt = {}
            net = vgg.net(self.vgg_path, image_pre)
            for layer in self.vgg_layer:
                fai_imgt[layer] = net[layer]

            image_pre = vgg.preprocess(
                tf.image.resize_images(out_im, [256, 256]))
            fai_imout = {}
            net = vgg.net(self.vgg_path, image_pre)
            for layer in self.vgg_layer:
                fai_imout[layer] = net[layer]

            Im_compt = self.holder[16] * self.holder[42] + (tf.add(
                tf.multiply(self.holder[16], -1), 1)) * ((out_im + 1) * 127.5)
            im_compt = tf.image.resize_images(Im_compt / 127.5 - 1, [256, 256])
            image_pre = vgg.preprocess(im_compt)
            fai_compt = {}
            net = vgg.net(self.vgg_path, image_pre)
            for layer in self.vgg_layer:
                fai_compt[layer] = net[layer]

            U_vars = [
                var for var in tf.trainable_variables() if 'UNET' in var.name
            ]
            total_loss = get_total_loss(out_im, self.holder[-1] / 127.5 - 1,
                                        self.holder[16], fai_imout, fai_imgt,
                                        fai_compt, self.vgg_layer, im_compt)
            optim = tf.train.AdamOptimizer()
            optimizer = optim.minimize(total_loss[0], var_list=U_vars)

            int_group = tf.group(tf.global_variables_initializer(),
                                 tf.local_variables_initializer())
            sess.run(int_group)

            graph = tf.summary.FileWriter(self.logdir, sess.graph)
            saver = tf.train.Saver(U_vars, max_to_keep=20)

            for epoch in range(self.num_epochs):
                for imid in range(int(self.total_ims // self.batch)):
                    mask_ims, gt_ims = get_im(self.ims_dir, imid)
                    self.get_all_mask(mask_ims, gt_ims)
                    feed_dic = get_feedict(self.all_masks, self.holder)
                    _, loss_total = sess.run([optimizer, total_loss],
                                             feed_dict=feed_dic)

                    if (int(epoch * self.total_ims) + imid) % 1 == 0:
                        print(
                            'epoch: %d,  cur_num: %d,  total_loss: %f, l_hole: %f, l_valid: %f, percept_loss: %f, style_loss_out: %f, style_loss_comp: %f, tv_loss: %f'
                            % (epoch, imid, loss_total[0], loss_total[1],
                               loss_total[2], loss_total[3], loss_total[4],
                               loss_total[5], loss_total[6]))
                if epoch % 5 == 0:
                    saver.save(sess,
                               self.save_path + 'model.ckpt',
                               global_step=epoch)
Beispiel #4
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/gpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/gpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            stderr.write('Iteration %d/%d\n' % (i + 1, iterations))
            if last or (print_iterations and i % print_iterations == 0):
                stderr.write('  content loss: %g\n' % content_loss.eval())
                stderr.write('    style loss: %g\n' % style_loss.eval())
                stderr.write('       tv loss: %g\n' % tv_loss.eval())
                stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                last_step = (i == iterations - 1)
                print_progress(i, last=last_step)
                train_step.run()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                    yield (
                        (None if last_step else i),
                        vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    )
Beispiel #5
0
def optimize(content_targets,
             style_targets,
             content_weight,
             style_weight,
             tv_weight,
             vgg_path,
             epochs=2,
             print_iterations=1,
             batch_size=4,
             save_path='saver/fns.ckpt',
             slow=False,
             learning_rate=1e-3,
             debug=False,
             save_checkpoint=False,
             restore_checkpoint_path=None):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod]

    # style_features = collections.defaultdict()
    style_features = []

    batch_shape = (batch_size, 256, 256, 5)
    style_shape = (1, 256, 256, 3)
    # print(style_shape)

    # precompute style features
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
        # with tf.Graph().as_default(), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)

        for i in range(len(style_targets)):
            index = 0
            style_pre = np.array([style_targets[i]])
            # current_style_feature = []
            current_style_feature = np.array([])

            for layer in STYLE_LAYERS:
                # print(layer)
                features = net[layer].eval(feed_dict={style_image: style_pre})
                # print(features.shape)
                features = np.reshape(features, (-1, features.shape[3]))
                # print(features.shape)
                # gram = np.matmul(features.T, features) / features.size
                gram = np.matmul(features.T - np.mean(features.T),
                                 features - np.mean(features)) / features.size

                # print(gram.shape)

                if not STYLE_LAYERS_SHAPE[STYLE_LAYERS.index(layer)]:
                    STYLE_LAYERS_SHAPE[STYLE_LAYERS.index(layer)] = gram.shape

                if not STYLE_LAYERS_SIZE[STYLE_LAYERS.index(layer)]:
                    STYLE_LAYERS_SIZE[STYLE_LAYERS.index(layer)] = gram.size

                if STYLE_LAYERS_INDEX[STYLE_LAYERS.index(layer)] == -1:
                    STYLE_LAYERS_INDEX[STYLE_LAYERS.index(layer)] = index

                index = index + gram.size

                # style_features[i][layer] = gram
                # current_style_feature.append(gram.tolist())
                # current_style_feature.append(gram.reshape(-1))
                current_style_feature = np.append(current_style_feature,
                                                  gram.reshape(-1))

            # style_features.append(np.array(current_style_feature).reshape(-1))
            style_features.append(current_style_feature)

        style_features = np.array(style_features, dtype=np.float32)
        # tf.convert_to_tensor(style_features)

    with tf.Graph().as_default(), tf.Session() as sess:
        lambda_style = tf.placeholder(tf.float32, name="lambda_style")
        style_id = tf.placeholder(tf.int32, name="style_id")
        X_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name="X_content")

        X_pre = vgg.preprocess(X_content[:, :, :, 0:3])

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256)
            preds_pre = preds
        else:
            preds = transform.net(X_content / 255.0)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            net[CONTENT_LAYER])

        # content_loss = (1 - lambda_style) * (2 * tf.nn.l2_loss(
        #     net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_size
        # )

        content_loss = content_weight * (
            2 * tf.nn.l2_loss(net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size
        )  # original

        style_losses = []
        for style_layer in STYLE_LAYERS:
            # print(style_layer)
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            # grams = tf.matmul(feats_T, feats) / size
            grams = tf.matmul(feats_T - tf.reduce_mean(feats_T),
                              feats - tf.reduce_mean(feats)) / size

            # test = lambda_style.eval(session=sess)
            # print('test : ' + test)
            # style_gram = style_features[style_id.eval(session=sess)][style_layer]
            # style_gram = style_features[0][style_layer]

            # s_id = sess.run(style_id)
            # style_gram = style_features[0][style_layer]

            # style_features = [tf.convert_to_tensor(x) for x in style_features]
            # s_gram = tf.gather_nd(style_features, style_id)
            # style_gram = s_gram[style_layer]

            # make style_features into tensor
            # keys = []
            # values = []
            #
            # for k, v in style_features.items():
            #     keys.append(k)
            #     values.append(k)

            # style_features_tf = tf.contrib.lookup.HashTable(
            #     initializer=tf.contrib.lookup.KeyValueTensorInitializer(
            #         keys=tf.constant(range(len(style_targets))),
            #         # values=tf.constant([style_features[i] for i in range(len(style_targets))]),
            #         # values=tf.constant(style_targets),
            #         values=style_targets,
            #     ),
            #     default_value=tf.constant(-1),
            #     name="style_features_tf"
            # )
            # current_style_features = style_features_tf.lookup(style_id)
            # style_gram = tf.gather_nd(current_style_features, STYLE_LAYERS.index(style_layer))
            # style_features_tf.init.run()
            # print(style_gram.eval())
            # print(style_gram.eval())
            style_index = STYLE_LAYERS.index(style_layer)
            style_grams = tf.gather_nd(tf.constant(style_features), [style_id])
            style_gram = style_grams[STYLE_LAYERS_INDEX[style_index]:
                                     STYLE_LAYERS_INDEX[style_index] +
                                     STYLE_LAYERS_SIZE[style_index]]
            style_gram = tf.reshape(style_gram,
                                    STYLE_LAYERS_SHAPE[style_index])
            # style_grams = style_features[style_id]

            # style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size)
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                STYLE_LAYERS_SIZE[style_index])

        style_loss = lambda_style * functools.reduce(tf.add,
                                                     style_losses) / batch_size
        # style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size  # original

        # total variation denoising
        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)

        if restore_checkpoint_path:
            print('Restoring checkpoint from : ' + restore_checkpoint_path)
            saver = tf.train.Saver()
            saver.restore(sess, restore_checkpoint_path)

        for epoch in range(epochs):
            if save_checkpoint and epoch > 0:
                saver = tf.train.Saver()
                head, tail = os.path.split(save_path)

                cp_dir = os.path.join(head, str(epoch))

                if not os.path.exists(cp_dir):
                    os.makedirs(cp_dir)

                cp_path = os.path.join(cp_dir, tail)
                print('save checkpoint to : ' + cp_path)
                res = saver.save(sess, cp_path)

            print('epoch: {}'.format(epoch))
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                # print('iterations: {}'.format(iterations))
                start_time = time.time()
                curr = iterations * batch_size

                curr_lambda_style = np.random.randint(1, 100) * 1.0
                curr_lambda_style_img = np.ones(
                    (256, 256, 1)) * curr_lambda_style

                curr_style_id = np.random.randint(
                    len(style_targets)) if epoch > 0 else 0
                # print('\ncurr_style_id:')
                # print(type(curr_style_id))
                # print(curr_style_id)
                curr_style_channel = np.ones((256, 256, 1)) * curr_style_id

                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    try:
                        curr_img = get_img(img_p,
                                           (256, 256, 3)).astype(np.float32)
                    except Exception:
                        continue

                    X_batch[j, :, :, 0:3] = curr_img
                    X_batch[j, :, :, 3:] = curr_lambda_style_img
                    X_batch[j, :, :, 4:] = curr_style_channel

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {
                    X_content: X_batch,
                    lambda_style: curr_lambda_style,
                    style_id: curr_style_id
                }

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))

                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples

                should_print = is_print_iter or is_last

                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {
                        X_content: X_batch,
                        lambda_style: 80.0,
                        style_id: 0  # np.random.randint(1, 10) / 10.0
                    }

                    tup = sess.run(to_get, feed_dict=test_feed_dict)
                    _style_loss, _content_loss, _tv_loss, _loss, _preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    # print(losses)
                    if slow:
                        _preds = vgg.unprocess(_preds)
                    else:
                        saver = tf.train.Saver()
                        res = saver.save(sess, save_path)

                    yield (_preds, losses, iterations, epoch)
Beispiel #6
0
def main():
    parser = build_parser()
    options, unknown = parser.parse_known_args()
    env = os.environ.copy()

    print("options: ", options)
    vgg_path = options.dataset + '/vgg/imagenet-vgg-verydeep-19.mat'
    model_name = options.style_image.replace('.jpg', '.ckpt')
    style_image = options.dataset + '/style_images/' + options.style_image
    training_path = options.dataset + '/train'
    model_dir = env.get("OUTPUT_DIR", options.ckpt)
    
    tensorboard_dir = env.get("LOG_DIR", options.dataset)

    print("style_image: ", style_image)
    print("vgg: ", vgg_path)
    print("trainingpath: ", training_path)
    print("modelname: ", model_name)

    

    if options.gpu == None:
        available_gpus = get_available_gpus()
        if len(available_gpus) > 0:
            device = '/gpu:0'
        else:
            device = '/cpu:0'
    else:
        if options.gpu > -1:
            device = '/gpu:{}'.format(options.gpu)
        else:
             device = '/cpu:0'

    batchsize = options.batchsize

    # content targets
    content_targets = [os.path.join(training_path, fn) for fn in list_files(training_path)]
    if len(content_targets) % batchsize != 0:
        content_targets = content_targets[:-(len(content_targets) % batchsize)]

    print('total training data size: ', len(content_targets))
    batch_shape = (batchsize,224,224,3)

    # style target
    style_target = read_img(style_image)
    style_shape = (1,) + style_target.shape

    with tf.device(device), tf.Session() as sess:

        # style target feature
        # compute gram maxtrix of style target
        style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image')
        vggstyletarget = vgg.net(vgg_path, vgg.preprocess(style_image))
        style_vgg = vgg.get_style_vgg(vggstyletarget, style_image, np.array([style_target]))        

        # content target feature 
        content_vgg = {}
        inputs = tf.placeholder(tf.float32, shape=batch_shape, name="inputs")
        content_net = vgg.net(vgg_path, vgg.preprocess(inputs))
        content_vgg['relu4_2'] = content_net['relu4_2']

        # feature after transformation 
        outputs = stylenet.net(inputs/255.0)        
        vggoutputs = vgg.net(vgg_path, vgg.preprocess(outputs))

        # compute feature loss
        loss_f = options.lambda_feat * vgg.total_content_loss(vggoutputs, content_vgg, batchsize)

        # compute style loss        
        loss_s = options.lambda_style * vgg.total_style_loss(vggoutputs, style_vgg, batchsize)
        
        # total variation denoising
        loss_tv = options.lambda_tv * vgg.total_variation_regularization(outputs, batchsize, batch_shape)
        
        # total loss
        loss = loss_f + loss_s + loss_tv

        
    with tf.Session() as sess:    
                
        if not os.path.exists(options.ckpt):
            os.makedirs(options.ckpt)

        save_path = model_dir + '/' + model_name

        # op to write logs to Tensorboard

        #training
        train_step = tf.train.AdamOptimizer(options.lr).minimize(loss)
        sess.run(tf.global_variables_initializer())        
    
        total_step = 0
        for epoch in range(options.epoch):
            print('epoch: ', epoch)
            step = 0
            while step * batchsize < len(content_targets):
                time_start = time.time()
                
                batch = np.zeros(batch_shape, dtype=np.float32)
                for i, img in enumerate(content_targets[step * batchsize : (step + 1) * batchsize]):
                   batch[i] = read_img(img).astype(np.float32) # (224,224,3)

                step += 1
                total_step += 1
            
                loss_, summary= sess.run([loss, train_step,], feed_dict= {inputs:batch})
                
             
                time_elapse = time.time() - time_start
                
                should_save = total_step % 2000 == 0                
               
                if total_step % 1 == 0:
                  
                    print('[step {}] elapse time: {} loss: {}'.format(total_step, time_elapse, loss_))

                if should_save:                                        
                    print('Saving checkpoint')
                    saver = tf.train.Saver()
                    res = saver.save(sess, save_path)
        
        print('Saving final result to ' + save_path)
        saver = tf.train.Saver()
        res = saver.save(sess, save_path)
Beispiel #7
0
def stylyze(options, callback):

    parser = build_parser()
    if options is None:
        key = 'TF_CPP_MIN_LOG_LEVEL'
        if key not in os.environ:
            os.environ[key] = '2'

        options = parser.parse_args()

    if not os.path.isfile(options.network):
        parser.error("Network %s does not exist. (Did you forget to "
                     "download it?)" % options.network)

    if [options.checkpoint_iterations,
            options.checkpoint_output].count(None) == 1:
        parser.error("use either both of checkpoint_output and "
                     "checkpoint_iterations or neither")

    if options.checkpoint_output is not None:
        if re.match(r'^.*(\{.*\}|%.*).*$', options.checkpoint_output) is None:
            parser.error("To save intermediate images, the checkpoint_output "
                         "parameter must contain placeholders (e.g. "
                         "`foo_{}.jpg` or `foo_%d.jpg`")

    content_image_arr = [imread(i) for i in options.content]
    style_images = [imread(style) for style in options.styles]

    width_arr = options.width
    for i in range(len(content_image_arr)):
        width = width_arr[i]
        content_image = content_image_arr[i]
        if width is not None:
            new_shape = (int(
                math.floor(
                    float(content_image.shape[0]) / content_image.shape[1] *
                    width)), width)
            content_image = scipy.misc.imresize(content_image, new_shape)
            content_image_arr[i] = content_image
        target_shape = content_image.shape
        for j in range(len(style_images)):
            style_scale = STYLE_SCALE
            if options.style_scales is not None:
                style_scale = options.style_scales[j]
            style_images[j] = scipy.misc.imresize(
                style_images[j],
                style_scale * target_shape[1] / style_images[j].shape[1])

    style_blend_weights = options.style_blend_weights
    if style_blend_weights is None:
        # default is equal weights
        style_blend_weights = [1.0 / len(style_images) for _ in style_images]
    else:
        total_blend_weight = sum(style_blend_weights)
        style_blend_weights = [
            weight / total_blend_weight for weight in style_blend_weights
        ]

    initial_arr = content_image_arr

    # try saving a dummy image to the output path to make sure that it's writable
    output_arr = options.output
    for output in output_arr:
        if os.path.isfile(output) and not options.overwrite:
            raise IOError("%s already exists, will not replace it without "
                          "the '--overwrite' flag" % output)
        try:
            imsave(output, np.zeros((500, 500, 3)))
        except:
            raise IOError('%s is not writable or does not have a valid file '
                          'extension for an image file' % output)

    vgg_weights, vgg_mean_pixel = vgg.load_net(options.network)

    style_shapes = [(1, ) + style.shape for style in style_images]
    style_features = [{} for _ in style_images]

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= options.style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute style features in feedforward mode
    for i in range(len(style_images)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, options.pooling)
            style_pre = np.array(
                [vgg.preprocess(style_images[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - options.initial_noiseblend

    for i in range(len(content_image_arr)):
        Data.save_step(Data.get_step() + 1)
        loss_arrs = None
        for iteration, image, loss_vals in stylize(
                initial=initial_arr[i],
                content=content_image_arr[i],
                preserve_colors=options.preserve_colors,
                iterations=options.iterations,
                content_weight=options.content_weight,
                content_weight_blend=options.content_weight_blend,
                tv_weight=options.tv_weight,
                learning_rate=options.learning_rate,
                beta1=options.beta1,
                beta2=options.beta2,
                epsilon=options.epsilon,
                pooling=options.pooling,
                initial_content_noise_coeff=initial_content_noise_coeff,
                style_images=style_images,
                style_layers_weights=style_layers_weights,
                style_weight=options.style_weight,
                style_blend_weights=style_blend_weights,
                vgg_weights=vgg_weights,
                vgg_mean_pixel=vgg_mean_pixel,
                style_features=style_features,
                print_iterations=options.print_iterations,
                checkpoint_iterations=options.checkpoint_iterations,
                callback=callback):
            if (image is not None) and (options.checkpoint_output is not None):
                imsave(fmt_imsave(options.checkpoint_output, iteration), image)
            if (loss_vals is not None) \
                    and (options.progress_plot or options.progress_write):
                if loss_arrs is None:
                    itr = []
                    loss_arrs = OrderedDict(
                        (key, []) for key in loss_vals.keys())
                for key, val in loss_vals.items():
                    loss_arrs[key].append(val)
                itr.append(iteration)

        imsave(options.output[i], image)

        if options.progress_write:
            fn = "{}/progress.txt".format(os.path.dirname(options.output[i]))
            tmp = np.empty((len(itr), len(loss_arrs) + 1), dtype=float)
            tmp[:, 0] = np.array(itr)
            for ii, val in enumerate(loss_arrs.values()):
                tmp[:, ii + 1] = np.array(val)
            np.savetxt(fn,
                       tmp,
                       header=' '.join(['itr'] + list(loss_arrs.keys())))

        if options.progress_plot:
            import matplotlib
            matplotlib.use('Agg')
            from matplotlib import pyplot as plt
            fig, ax = plt.subplots()
            for key, val in loss_arrs.items():
                ax.semilogy(itr, val, label=key)
            ax.legend()
            ax.set_xlabel("iterations")
            ax.set_ylabel("loss")
            fig.savefig("{}/progress.png".format(
                os.path.dirname(options.output[i])))
Beispiel #8
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None,
        print_image_iterations=False):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)
                train_step.run()
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)
                if (i % 100 == 0) and (print_image_iterations):
					temp_image = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
					temp_output = 'iteration_' + str(i) + '.jpg'
					imsave(temp_output, temp_image)
            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
Beispiel #9
0
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = '0'
    with g.as_default(), g.device('/cpu:0'), tf.Session(config=config) as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session(
                config=config) as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            '''
            Compute the content loss
            
            Variables:
            content_weight: scalar constant we multiply the content_loss by.
            net[content_layer]: features of the current image, Tensor with shape [1, height, width, channels]
            content_features[content_layer]: features of the content image, Tensor with shape [1, height, width, channels]
            '''

            # features of the current image [1, height, width, channels]
            l_content = content_weight * tf.reduce_sum(
                (net[content_layer] - content_features[content_layer])**2)

            content_losses.append(content_layers_weights[content_layer] *
                                  l_content)
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, channels = map(lambda i: i.value,
                                                 layer.get_shape())
                size = height * width * channels
                '''
    	        Compute the Gram matrix of the layer
                    	        
    	        Variables:
                layer: features of the current image at style_layer, Tensor with shape [1, height, width, channels]
                gram: computed gram matrix with shape [channels, channels]
    	        '''

                feats = tf.reshape(layer, (-1, channels))
                gram = tf.matmul(tf.transpose(feats), feats)
                gram /= size
                '''
    	        Compute the style loss
    	        
    	        Variables:
    	        style_layers_weights[style_layer]: scalar constant we multiply the content_loss by.
                gram: computed gram matrix with shape [channels, channels]
    	        style_gram: computed gram matrix of the style image at style_layer with shape [channels, channels]
    	        '''
                style_gram = style_features[i][style_layer]
                l_style = style_layers_weights[style_layer] * tf.reduce_sum(
                    (gram - style_gram)**2)

                style_losses.append(l_style)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        '''
        Compute the TV loss
            	        
        Variables:
        tv_weight: scalar giving the weight to use for the TV loss.
        image: tensor of shape (1, H, W, 3) holding current image.
        '''
        tv_loss = tv_weight * (tf.reduce_sum(
            (image[:, 1:, :, :] - image[:, :-1, :, :])**2) + tf.reduce_sum(
                (image[:, :, 1:, :] - image[:, :, :-1, :])**2))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Beispiel #10
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # LOAD WEIGHTS AND AVG PIXEL HERE

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    # SET UP GRAPH
    # RUN SESSION ON CPU
        # IMAGE PLACEHOLDER
        # LOAD VGG
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            # evaluate features
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        # CREATE GRAPH FOR EVERY STYLE
        # RUN SESSION ON CPU
            # IMAGE PLACEHOLDER
            # LOAD VGG
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                # evaluate features
                features = net[layer].eval(feed_dict={image: style_pre})
                # create gram matrix
                features = np.reshape(features, (-1, features.shape[3]))
                # CREATE GRAM MATRIX
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropagation
    # USE DEFAULT GRAPH
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        # CREATE IMAGE VARIABLE FROM INITIAL
        # LOAD NET

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        # ADD CONTENT LOSSES FOR EACH LAYER TOGETHER - USE REDUCE

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                # CREATE GRAM MATRIX
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))

        # overall loss
        # LOSS FUNCTION LINE HERE

        # optimizer setup
        # TF.TRAIN LINE HERE - USE ADAM

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization BACKPROP
        best_loss = float('inf')
        best = None
        # START SESSION
            # INITIALIZE VARIABLES TO BEGIN OPTIMIZATION
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(iteration_times[-10:]) * (iterations - i)
                    stderr.write('Iteration %4d/%4d (%s elapsed, %s remaining)\n' % (
                        i + 1,
                        iterations,
                        hms(elapsed),
                        hms(remaining)
                    ))
                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                # backprop - replacing loss
                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    # CHECK LOSS HERE

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
Beispiel #11
0
def stylize(network, initial, content, styles, iterations, content_weight, style_weight, style_blend_weights, tv_weight, learning_rate, print_iterations=None, checkpoint_iterations=None):

	# input.shape = (n_image, height, width, channel)
	content_shape = (1, ) + content.shape
	# style.shape = [ all style shapes ]
	style_shapes = [(1, ) + style.shape for style in styles]
	content_features = {}
	style_features = [{} for _ in styles] # for multiple style image inputs

	# compute content features
	g = tf.Graph()
	with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
		content_pl = tf.placeholder('float', shape=content_shape)
		# compute feedforward activations
		# net is the activation of image using Placeholder
		activation, mean_pixel = vgg.net(network, content_pl)

		# preprocessing the input
		content_preprocessed = np.array([vgg.preprocess(content, mean_pixel)])

		# extract content features using preprocessed input into the VGG
		# we only extract content features from one layer
		content_features[CONTENT_LAYER] = activation[CONTENT_LAYER].eval(
			feed_dict={content_pl: content_preprocessed})


	# compute style features
	# the loop below is for multiple style image inputs
	for i in range(len(styles)):
		g = tf.Graph()
		with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
			# since different style layers have differnt shapes
			style_pl = tf.placeholder('float', shape=style_shapes[i])
# question: why we use the mean value from content_features
			activation, _ = vgg.net(network, style_pl)
			style_preprocessed = np.array([vgg.preprocess(styles[i], mean_pixel)])
			# since we will compute multiple layers of styles, we use loop
			for layer in STYLE_LAYERS:
				# extract the one of the style features from one layer 
				_features = activation[layer].eval(feed_dict={style_pl: style_preprocessed})
				# since we will compute the Gram Matrix, we will reshape the output
				# so that the inner product is easier to compute
# question why should we reshape? what is the origal shape? what does -1 mean
				_features = _features.reshape((-1, _features.shape[3]))
				# compute the Gram Matrix as style features
# why divide it by _features.size
				gram = np.matmul(_features.T, _features) / _features.size
				style_features[i][layer] = gram  # the first index is the n_th style image input

	# compute back-prop
	with tf.Graph().as_default():
		# initial = None means this iteration is our first iteration
		# thus we need to generate a white noise image
		if initial is None:
# the noise turned out to be not used at all
			white_noise_image = np.random.normal(size=content_shape, scale=np.std(content) * .1)
			initial = tf.random_normal(content_shape) * .256
		# if we already have an image in training
		# we will keep using this image for further modification
		else:
			initial_preprocessed = np.array([vgg.preprocess(initial, mean_pixel)])
			initial = initial_preprocessed.astype('float32')
		# we make this initial input as a trainable variable
		image = tf.Variable(initial)
		activation, _ = vgg.net(network, image)

		# compute content loss
		image_content_features = activation[CONTENT_LAYER]
		target_content_features = content_features[CONTENT_LAYER]
# why divide it by target.size, can we eliminate that?
		# the content weight is included here rather than the end
		content_loss = content_weight * .5 * 1 / target_content_features.size * tf.nn.l2_loss(image_content_features - target_content_features)

		# compute style loss
		# using loop to sum style loss for multiple style image inputs
		style_loss_for_all_styles = 0
		for i in range(len(styles)):
			style_losses = []  # the total losses
			# using loop to sum style loss for multiple style layers
			for style_layer in STYLE_LAYERS:
				layer_activation = activation[style_layer]
				_, height, width, channel = map(
					lambda i: i.value, layer_activation.get_shape())
				layer_size = height * width * channel
				feats = tf.reshape(layer_activation, (-1, channel))
# it doesn't have to divide by size
				image_style_gram = tf.matmul(tf.transpose(feats), feats) / layer_size
				target_style_gram = style_features[i][style_layer]
				layer_style_loss = 2 / target_style_gram.size * tf.nn.l2_loss(image_style_gram - target_style_gram)
				style_losses.append(layer_style_loss)
			style_loss_for_all_styles += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

		# total variation denoising
		# this loss is added to regularize that
		# the output image will not deviate too much
		# from content image at each pixel
		tv_y_size = _tensor_size(image[:, 1:, :, :])
		tv_x_size = _tensor_size(image[:, :, 1:, :])
		tv_loss = tv_weight * 2 * (
			(tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :content_shape[1] - 1, :, :]) / tv_y_size) +
			(tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :content_shape[2] - 1, :]) / tv_x_size))

		# overall loss
		loss = content_loss + style_loss_for_all_styles + tv_loss

		# optimizer
		train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

		def print_progress(i, last=False):
			
			stderr.write('Iteration %d/%d' % (i + 1, iterations))
			
			if last or (print_iterations and i % print_iterations == 0):
				stderr.write('  content loss: %g\n' % content_loss.eval())
				stderr.write('    style loss: %g\n' % style_loss_for_all_styles.eval())
				stderr.write('       tv loss: %g\n' % tv_loss.eval())
				stderr.write('    total loss: %g\n' % loss.eval())

		# optimization
		best_loss = float('inf')  # all losses will be lower than initial
		best = None
		total_initial_time = datetime.now().replace(microsecond=0)
		with tf.Session() as sess:
			sess.run(tf.initialize_all_variables())
			initial_time = datetime.now().replace(microsecond=0)
			for i in range(iterations):
				now_time = datetime.now().replace(microsecond=0)
				last_step = (i == iterations - 1)
				print_progress(i, last=last_step)
				stderr.write(' Training Time %s  Elapsed Time %s\n' % (str(now_time - initial_time), str(now_time - total_initial_time)))
				initial_time = now_time
				train_step.run()

				# when checkpoint_iterations is not None
				# and when iter idx fulfills it
				# or when it comes to the last step
				if(checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
					this_loss = loss.eval()
					if this_loss < best_loss:
						best_loss = this_loss
						# image was a tf.Variable
						# eval it turns it into numbers, I guess
						best = image.eval()
# what is yield?
					# content_shape[1:] means 
					# shape[1], shape[2], shape[3]
					# but eliminate the shape[0] which 
					# means the number of images
					yield(
						(None if last_step else i),
						(vgg.unprocess(best.reshape(content_shape[1:]), mean_pixel)))
def train(content_targets,
          style_target,
          content_weight,
          style_weight,
          tv_weight,
          vgg_path,
          epochs=2,
          print_iterations=1000,
          batch_size=4,
          learning_rate=1e-3,
          save_path='model/style.ckpt'):
    # 根据batch丢弃最后的训练图像
    mod = len(content_targets) % batch_size
    if mod > 0:
        content_targets = content_targets[:-mod]

    style_features = {}
    # 训练图像大小:320x320x3,按照tensorflow格式
    batch_shape = (batch_size, 320, 320, 3)
    style_shape = (1, ) + style_target.shape

    # 读取训练好的VGGNet模型
    weights, mean_pixel = vgg.load_net(vgg_path)

    with tf.Graph().as_default(), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        # 没看错!空图片减去均值
        style_image_pre = vgg.preprocess(style_image, mean_pixel)
        net = vgg.net(weights, style_image_pre)
        # 把style图片展开形成数组
        style_pre = np.array([style_target])
        for layer in STYLE_LAYER:
            # 取出该层的计算结果
            features = net[layer].eval(feed_dict={style_image: style_pre})
            # 行数为该层的Filter数(参见论文)
            features = np.reshape(features, (-1, features.shape[3]))
            # Gram Matrix: A'A (参见论文)
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        x_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name='x_content')
        x_pre = vgg.preprocess(x_content, mean_pixel)

        content_features = {}
        content_net = vgg.net(weights, x_pre)
        # 同上,提取所需层
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        # 使用残差网络
        preds = residual.net(x_content / 255.0)
        preds_pre = vgg.preprocess(preds, mean_pixel)
        net = vgg.net(weights, preds_pre)
        # 计算每个batch里的所有数据
        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            net[CONTENT_LAYER])
        # 计算经过残差网络和不经过时的差别
        content_loss = content_weight * (
            2 * tf.nn.l2_loss(net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size)

        # 计算经过残差网络的图像与style图像之间的差别
        style_losses = []
        for style_layer in STYLE_LAYER:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            # Gram Matrix: A'A (参见论文)
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                style_gram.size)
        style_loss = style_weight * functools.reduce(tf.add,
                                                     style_losses) / batch_size

        # 去图像噪声: Total Variation
        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size

        # 最终的loss函数
        loss = content_loss + style_loss + tv_loss

        # 开始训练过程
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())

        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            start_time = time.time()
            # 每一次epoch就用训练集的所有图片训练一遍
            while iterations * batch_size < num_examples:
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = helper.read_img(img_p, (320, 320, 3)).astype(
                        np.float32)

                iterations += 1
                # 确保每批次计算的时候不出错
                assert X_batch.shape[0] == batch_size
                feed_dict = {x_content: X_batch}

                # 开始训练
                train_step.run(feed_dict=feed_dict)

                # 隔几次打印一次训练进度
                is_print_iter = int(iterations) % print_iterations == 0
                # 是否是最后一个epoch
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples

                # 打印信息
                should_print = is_print_iter or is_last
                if should_print:
                    current_time = time.time()
                    delta_time = current_time - start_time
                    start_time = current_time

                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {x_content: X_batch}

                    tup = sess.run(to_get, feed_dict=test_feed_dict)
                    _style_loss, _content_loss, _tv_loss, _loss, _preds = tup

                    losses = (_style_loss, _content_loss, _tv_loss, _loss)

                    saver = tf.train.Saver()
                    res = saver.save(sess, save_path)
                    yield (_preds, losses, iterations, epoch, delta_time)
Beispiel #13
0
def optimize(content_targets, style_target, content_weight, style_weight,
        tv_weight, vgg_path, epochs=2, print_iterations=1000, batch_size=4,
        checkpoint_dir='saver/fns.ckpt', summary_dir='summary/', learning_rate=1e-3):
    """
    Calculate the total loss and optimize the network.

    Args:
        content_targets: The content image.
        style_target: The style image.
        content_weight: Weight for content loss.
        style_weight: Weight for style loss.
        tv_weight: Weight for total vaiance.
        vgg_path: Path of the vgg network.
        epochs: Number of epochs for training. Default: 2. 
        print_iteration: Print the trainging loss. Default: 1000
        batch_size: Default: 4.
        checkpoint_dir: Path to save the checkpoint.
        summary_dir: Path to save summaries.
        learning_rate: Default: 1e-3.

    Returns:
        Yield the prediction, losses, iteration and epoch
    """
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod] # discard the remaining
    
    batch_shape = (batch_size, 256, 256, 3)

    # precompute style features
    style_features = _style_features(style_target, vgg_path)
    
    X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content")
    X_pre = vgg.preprocess(X_content)

    # compute content features
    content_features = {}
    content_net = vgg.net(vgg_path, X_pre)
    content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]
        
    # content is the input for both the transform network and the loss
    # network
    preds = transform.net(X_content/255.0)
    preds_pre = vgg.preprocess(preds)    
    net = vgg.net(vgg_path, preds_pre)

    # compute loss
    content_loss = _content_loss(content_weight, net, content_features, batch_size)
    style_loss = _style_loss(style_weight, net, style_features)
    tv_loss = _tv_loss(tv_weight, preds, batch_shape)
    loss = content_loss + style_loss + tv_loss
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    # summary for tensorboard
    tf.summary.scalar("content loss", content_loss)
    tf.summary.scalar("style loss", style_loss)
    tf.summary.scalar("tv loss", tv_loss)
    tf.summary.scalar("total loss", loss)
    summary_op = tf.summary.merge_all()
    writer = tf.summary.FileWriter(summary_dir, graph=tf.get_default_graph())
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = get_img(img_p, (256, 256, 3)).astype(np.float32) #resize content image

                iterations += 1
                assert X_batch.shape[0] == batch_size
                
                feed_dict = {
                        X_content : X_batch 
                        }
                #train_step.run(feed_dict = feed_dict)
                summary, _ = sess.run([summary_op, train_step], feed_dict = feed_dict)

                is_print_iter = int(iterations) % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {X_content:X_batch}
                    tup = sess.run(to_get, feed_dict = test_feed_dict)
                    style_loss_p, content_loss_p, tv_loss_p,loss_p, preds_p = tup
                    losses = (style_loss_p, content_loss_p, tv_loss_p, loss_p)
                    saver = tf.train.Saver(max_to_keep = 5)
                    res = saver.save(sess, checkpoint_dir, iterations)
                    yield(preds_p, losses, iterations, epoch)
                
                if int(iterations) % 20 == 0:
                    writer.add_summary(summary)
Beispiel #14
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)



    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})



    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features_bank = sk_image.extract_patches_2d(np.squeeze(features), (kernel_s, kernel_s))
                style_features[i][layer] = [features_bank,features]





    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
#            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
       

        # content loss
        content_layers_weights = {}
#        content_layers_weights['relu4_2'] = content_weight_blend
#        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend
        content_layers_weights['relu4_2'] = 0.5
        content_layers_weights['relu5_2'] = 0.5
        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:              
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)




        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                
                # Calculate normalized layer
                layer = tf.expand_dims(net[style_layer],axis=4)
                paddings = [[0, 0], [1,1], [1,1], [0, 0],[0,0]]
                layer_depth = layer.get_shape().as_list()[3]
                layer_pad = tf.pad(layer, paddings, "CONSTANT")
                layer_norm = tf.sqrt(tf.nn.conv3d(tf.pow(layer_pad,2),tf.ones((kernel_s,kernel_s,layer_depth,1,1),dtype=tf.float32),strides=[1, 1, 1, 1, 1],padding='VALID'))
                
                # Calculate normalized filter bank
                style_filters = np.transpose(style_features[i][style_layer][0],(1,2,3,0))
                style_filters = np.expand_dims(style_filters,axis=3)
                style_filters_norm = np.sqrt(np.sum(np.power(style_filters,2),axis=(0,1,2)))
                style_filters_normalized = style_filters/style_filters_norm
                
                # Calculate normalized correlations
                layer_filtered = tf.nn.conv3d(layer_pad,style_filters_normalized,strides=[1, 1, 1, 1, 1],padding='VALID')/layer_norm
                
                # Find maximum response and index into the filters
                max_filter_response_idx = tf.squeeze(tf.argmax(layer_filtered,axis=4))
#                max_filter_response_idx = tf.squeeze(tf.argmax(tf.abs(layer_filtered),axis=4))
                max_filter_response_idx = tf.reshape(max_filter_response_idx,[-1])
                max_filter_response_weight = tf.squeeze(tf.reduce_max(tf.abs(layer_filtered),axis=4))
                max_filter_response_weight = tf.reshape(max_filter_response_weight,[-1])
                max_filter_response_weight = max_filter_response_weight/tf.reduce_max(max_filter_response_weight)
                
                style_filters_tf = tf.transpose(tf.squeeze(tf.convert_to_tensor(style_filters, np.float32)),(3,0,1,2))
                style_filters_tf_gathered = tf.gather(style_filters_tf,max_filter_response_idx)
                style_filters_tf_gathered = tf.reshape(style_filters_tf_gathered,(style_filters_tf_gathered.get_shape().as_list()[0], -1))
                layer_patches = tf.extract_image_patches(tf.squeeze(layer_pad,axis=4),
                                        [1,kernel_s,kernel_s,1],
                                        [1,1,1,1],
                                        [1,1,1,1],
                                        padding="VALID")
                layer_size = tf.shape(layer_patches)
                layer_patches = tf.reshape(layer_patches,(-1, layer_size[3]))
                style_norm = tf.cast(layer_size[1]*layer_size[2]*layer_size[3],dtype=tf.float32)

#                gram1 = tf.matmul(tf.transpose(layer_patches), layer_patches) / style_norm
#                gram2 = tf.matmul(tf.transpose(style_filters_tf_gathered), style_filters_tf_gathered) / style_norm
#                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram1- gram2))
                
                loss_ = tf.reduce_mean(tf.reduce_mean(tf.pow(layer_patches-style_filters_tf_gathered, 2),axis=1)*tf.stop_gradient(max_filter_response_weight))
                style_losses.append(style_layers_weights[style_layer] * 2 * loss_)
                
                
                
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
#                print(str(max_filter_response_weight.eval()))
#                print(' ')
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )
Beispiel #15
0
    G_sample)  # G_sample = generated dataset (fake)
# D_real & D_fake = unused   (D_fake = probability G fools D)
""" Feature Loss """
#VGG
#content = imread('abbeyexample_copy.png')/256
#content = gray2rgb(rgb2gray(content))
shape = (1, 256, 256, 3)
pooling = 'avg'
CONTENT_LAYERS = ('relu4_2', 'relu5_2')
network = 'imagenet-vgg-verydeep-19.mat'
vgg_weights, vgg_mean_pixel = vgg.load_net(network)
print(5)
orig_image = tf.placeholder(
    'float', shape=shape)  #need to feed it with (1,256,256,3) objects
print(orig_image)
orig_content = vgg.preprocess(orig_image,
                              vgg_mean_pixel)  #tensor (1,256,256,3)
print(orig_content)
print('G_sample.shape', G_sample.shape)
G_sample_dim = G_sample
G_sample = tf.reshape(G_sample, (256, 256))
G_sample = tf.stack([G_sample, G_sample, G_sample],
                    axis=2)  #tensor (256,256,3)
print('G_sample.shape', G_sample.shape)
gen_content = vgg.preprocess(G_sample, vgg_mean_pixel)
gen_content = tf.expand_dims(gen_content, 0)
print('ok')
orig_net = vgg.net_preloaded(vgg_weights, orig_content, pooling)
print('ok1')
gen_net = vgg.net_preloaded(vgg_weights, gen_content, pooling)
#content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
#print('content_pre.shape',content_pre.shape)
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path, epochs=2, print_iterations=1000,
             batch_size=4, save_path='saver/fns.ckpt', slow=False,
             learning_rate=1e-3, debug=False):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod] 

    style_features = {}

    batch_shape = (batch_size,256,256,3)
    style_shape = (1,) + style_target.shape
    print(style_shape)

    # precompute style features
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image:style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256
            )
            preds_pre = preds
        else:
            preds = transform.net(X_content/255.0)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(content_features[CONTENT_LAYER])*batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(net[CONTENT_LAYER])
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_size
        )

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i:i.value,layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0,2,1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:,1:,:,:])
        tv_x_size = _tensor_size(preds[:,:,1:,:])
        y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:batch_shape[1]-1,:,:])
        x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[2]-1,:])
        tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                   X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {
                   X_content:X_batch
                }

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {
                       X_content:X_batch
                    }

                    tup = sess.run(to_get, feed_dict = test_feed_dict)
                    _style_loss,_content_loss,_tv_loss,_loss,_preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                       _preds = vgg.unprocess(_preds)
                    else:
                       saver = tf.train.Saver()
                       res = saver.save(sess, save_path)
                    yield(_preds, losses, iterations, epoch)
Beispiel #17
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape   #若content.shape=(356, 600, 3)  shape=(1,356, 600, 3)
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}                 #创建内容features map
    style_features = [{} for _ in styles]     #创建风格features map

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)  #加载预训练模型,得到weights和mean_pixel

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp     #若有设置style_layer_weight_exp,则style_layers_weights指数增长,
                                                   # style_layer_weight_exp默认为1不增长

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum   #更新style_layers_weights应该是比例,使其总和为1

    # 首先创建一个image的占位符,然后通过eval()的feed_dict将content_pre传给image,
    # 启动net的运算过程,得到了content的feature maps
    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.compat.v1.Session() as sess:   #计算content features
        image = tf.compat.v1.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)        #所有网络在此构建,net为content的features maps
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])  #content - vgg_mean_pixel
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre}) #content_features取值
            # print(layer,content_features[layer].shape)

    # compute style features in feedforward mode
    for i in range(len(styles)):                     #计算style features
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.compat.v1.Session() as sess:
            image = tf.compat.v1.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)       #pooling 默认为MAX
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])  #styles[i]-vgg_mean_pixel
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))   #根据通道数目reshape
                gram = np.matmul(features.T, features) / features.size  #gram矩阵
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256            #初始化图片
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + ( tf.random.normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        '''
        image = tf.Variable(initial)初始化了一个TensorFlow的变量,即为我们需要训练的对象。
        注意这里我们训练的对象是一张图像,而不是weight和bias。
        '''
        net = vgg.net_preloaded(vgg_weights, image, pooling)   #此处的net为生成图片的features map

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend      #内容图片 content weight blend, conv4_2 * blend + conv5_2 * (1-blend)
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend  #content weight blend默认为1,即只用conv4_2层

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /         #生成图片-内容图片
                    content_features[content_layer].size))     # tf.nn.l2_loss:output = sum(t ** 2) / 2
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        '''
        由于style图像可以输入多幅,这里使用for循环。同样的,将style_pre传给image占位符,
        启动net运算,得到了style的feature maps,由于style为不同filter响应的内积,
        因此在这里增加了一步:gram = np.matmul(features.T, features) / features.size,即为style的feature。
        '''
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size   #求得生成图片的gram矩阵
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        '''
        接下来定义了Content Loss和Style Loss,结合文中的公式很容易看懂,在代码中,
        还增加了total variation denoising,因此总的loss = content_loss + style_loss + tv_loss
        '''
        loss = content_loss + style_loss + tv_loss     #总loss为三个loss之和

        # optimizer setup
        # optimizer setup
        # 创建train_step,使用Adam优化器,优化对象是上面的loss
        # 优化过程,通过迭代使用train_step来最小化loss,最终得到一个best,即为训练优化的结果
        train_step = tf.compat.v1.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)   #还原图片

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (             #相当于return,但用于迭代
                        (None if last_step else i),
                        img_out
                    )
Beispiel #18
0
def main():
    content_path, style_path, width, style_scale = sys.argv[1:]
    width = int(width)
    style_scale = float(style_scale)

    content_image = imread(content_path)
    style_image = imread(style_path)

    if width > 0:
        new_shape = (int(math.floor(float(content_image.shape[0]) /
                content_image.shape[1] * width)), width)
        content_image = sm.imresize(content_image, new_shape)
    if style_scale > 0:
        style_image = sm.imresize(style_image, style_scale)

    shape = (1,) + content_image.shape
    style_shape = (1,) + style_image.shape

    content_features = {}
    style_features = {}
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(VGG_PATH, image)
        content_pre = np.array([vgg.preprocess(content_image, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shape)
        net, _ = vgg.net(VGG_PATH, image)
        style_pre = np.array([vgg.preprocess(style_image, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            grammatrix = np.matmul(features.T, features)
            style_features[layer] = grammatrix

    g = tf.Graph()
    with g.as_default():
        global_step = tf.Variable(0, trainable=False)
        noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1)
        content_pre = vgg.preprocess(content_image, mean_pixel)
        init = content_pre * (1 - NOISE_RATIO) + noise * NOISE_RATIO
        init = init.astype('float32')
        image = tf.Variable(init)
        net, _ = vgg.net(VGG_PATH, image)

        content_loss = tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats)

            style_gram = style_features[i]

            style_losses.append(tf.nn.l2_loss(gram - style_gram) /
                    (4.0 * number ** 2 * (height * width) ** 2))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        loss = ALPHA * content_loss + BETA * style_loss

        learning_rate = tf.train.exponential_decay(LEARNING_RATE_INITIAL,
                global_step, LEARNING_DECAY_STEPS, LEARNING_DECAY_BASE,
                staircase=True)
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss,
                global_step=global_step)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(100000):
                print 'i = %d' % i
                imsave('%05d.jpg' % i, vgg.unprocess(
                        image.eval().reshape(shape[1:]), mean_pixel))
                train_step.run()
                VGG_PATH)  # convert the tf.nn.conv2d to slim format of vgg
        decoder = Decoder(mode='test', weights_path=DECODER_PATH)

        content_input = tf.placeholder(tf.float32,
                                       shape=(1, None, None, 3),
                                       name='content_input')
        style_input = tf.placeholder(tf.float32,
                                     shape=(1, None, None, 3),
                                     name='style_input')

        # switch RGB to BGR
        content = tf.reverse(content_input, axis=[-1])
        style = tf.reverse(style_input, axis=[-1])

        # preprocess image
        content = vgg.preprocess(content)
        style = vgg.preprocess(style)

        encoder_content, encoder_content_points = vgg.vgg_19(
            content, reuse=False, final_endpoint="conv4_1")
        encoder_style, encoder_style_points = vgg.vgg_19(
            style, reuse=True, final_endpoint="conv4_1")

        # pass the encoded images to AdaIN
        target_features = AdaIN(encoder_content, encoder_style)

        # decode target features back to image
        with tf.variable_scope("decoder_target"):
            #alpha = 0.8
            #target_features=(1-alpha)*encoder_content+alpha*target_features #content-style trade-off
            generated_img = decoder.decode(target_features)
Beispiel #20
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image);
    `iteration` is None if this is the final image (the last iteration).
    Otherwise tuples are yielded every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """

    # The shape information in the comment is based on the content image 1-content.jpg with shape (533, 400, 3)
    # and 1-style.jpg (316, 400, 3)
    # This should be changed with different images.

    shape = (1,) + content.shape                                # (1, 533, 400, 3)
    style_shapes = [(1,) + style.shape for style in styles]     # (1, 316, 400, 3)
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)         # Load the VGG-19 model.
    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight        # {'relu1_1': 1.0, 'relu2_1': 1.0, 'relu3_1': 1.0, 'relu4_1': 1.0, 'relu5_1': 1.0}
        layer_weight *= style_layer_weight_exp                  # 1.0

    # VGG19 layers:
    # 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
    # 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
    # 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
    # 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
    # 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4'

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:                            # ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
        layer_weights_sum += style_layers_weights[style_layer]  # 5.0
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum  # {'relu1_1': 0.2, 'relu2_1': 0.2, 'relu3_1': 0.2, 'relu4_1': 0.2, 'relu5_1': 0.2}

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)              # {'conv1_1': Tensor..., relu1_1: Tensor...}
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)]) # (1, 533, 400, 3) subtract with the mean pixel
        for layer in CONTENT_LAYERS:                                                  # (relu4_2, relu5_2)
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre}) # Find the feature values for (relu4_2, relu5_2)

    # compute style features in feed forward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])            # (1, 316, 400, 3)
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:                                        # # ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
                features = net[layer].eval(feed_dict={image: style_pre})      # For relu1_1 layer (1, 316, 400, 64)
                features = np.reshape(features, (-1, features.shape[3]))      # (126400, 64)
                gram = np.matmul(features.T, features) / features.size        # (64, 64) Gram matrix - measure the dependency of features.
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend                    # 0

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) # Generate a random image with SD the same as the content image.
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:       # {'relu5_2'}
            # Use MSE as content losses
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]            # For relu1_1: (1, 533, 400, 64)
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))                # (213200, 64)
                gram = tf.matmul(tf.transpose(feats), feats) / size    # Gram matrix for the features in relu1_1 for the result image.
                style_gram = style_features[i][style_layer]            # Gram matrix for the style
                # Style loss is the MSE for the difference of the 2 Gram matrix
                style_losses.append(style_layers_weights[style_layer]
                                    * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # Total variation denoising: Add cost to penalize neighboring pixel is very different.
        # This help to reduce noise.
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                print 'Initial feature shape: ', features.shape
                features = np.reshape(features, (-1, features.shape[3]))
                #mask = np.zeros_like(features)
                #mask[:49664/2, :] = 1
                #print 'Mask shape', mask.shape
                print 'Final features shape', features.shape
                #features = features*mask
                gram = np.matmul(features.T, features) / features.size
                print 'Gram matrix shape: ', gram.shape
                style_features[i][layer] = gram

    #sys.exit()
    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                print 'Height, width, number', height, width, number
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                
                #print tf.shape(feats).as_list()

                if normal_flag == 0:
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    maskt = np.reshape(imread('bottle_mask.jpg').astype(np.float32), (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                #print sum(sum(mask == 1)) + sum(sum(mask == 0))
                #mask[:height*width/2, :] = 1
                    if i == 0:
                        mask = tf.constant(mask)
                        feats = tf.mul(feats,mask)

                        gram = tf.matmul(tf.transpose(feats), feats) / size
                        style_gram = style_features[i][style_layer]
                        style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
                    else:
                        mask2 = mask < 1
                        feats2 = tf.mul(feats,mask2)
                        gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                        style_gram = style_features[i][style_layer]
                        style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)

                else:
                    feats2 = feats
                    gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)
                    pass



            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        if normal_flag != 0:
            print "general mask :"
            mask = np.zeros((height*width, number), dtype=np.float32)
            maskt = np.reshape(imread('bottle_mask.jpg').astype(np.float32), (height*width,))
            maskt = maskt > 100
            # for d in xrange(3):
            #     mask[:,d] = maskt
            print 'Mask shape', maskt.shape
            maskt = maskt.reshape((height,width))

            maskt = np.array([maskt,maskt,maskt])
            maskt = maskt.transpose((1,2,0))
            mask = tf.constant(maskt, dtype=tf.float32)
            # feats = tf.mul(feats,mask)

        def capper(a,b,mask):
            # (1, 468, 304, 3)
            print "orig shape", a
            reshaped_in_grad = tf.reshape(a,[-1] )
            print "reshaped grad", reshaped_in_grad

            print "mask" ,mask
            g = tf.mul(a,mask)
            # g = tf.reshape(g, (1,height,width,3))
            # print a,b
            # print g
            return g,b


        # optimizer setup
        # train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        #         # Create an optimizer.
        train_step = tf.train.GradientDescentOptimizer(learning_rate)
         # # Compute the gradients for a list of variables.
        grads_and_vars = train_step.compute_gradients(loss)
        # # grads_and_vars is a list of tuples (gradient, variable).  Do whatever you
        # # need to the 'gradient' part, for example cap them, etc.
        capped_grads_and_vars = [(capper(gv[0], gv[1], mask)) for gv in grads_and_vars]
        # # Ask the optimizer to apply the capped gradients.
        train_step = train_step.apply_gradients(capped_grads_and_vars)

        # opt_op = opt.minimize(cost, var_list=<list of variables>)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)

                

               

                train_step.run()



                # print "runningstep: ",i, running_step
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)

                if i % 10 == 0 and best is not None:
                    tmp_img = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    imsave("iter" + str(i) + ".jpg", tmp_img)

            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
Beispiel #22
0
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            styles,
            preserve_colors,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image, loss_vals) at every
    iteration. However `image` and `loss_vals` are None by default. Each
    `checkpoint_iterations`, `image` is not None. Each `print_iterations`,
    `loss_vals` is not None.

    `loss_vals` is a dict with loss values for the current iteration, e.g.
    ``{'content': 1.23, 'style': 4.56, 'tv': 7.89, 'total': 13.68}``.

    :rtype: iterator[tuple[int,image]]
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))

        # total loss
        loss = content_loss + style_loss + tv_loss

        # We use OrderedDict to make sure we have the same order of loss types
        # (content, tv, style, total) as defined by the initial costruction of
        # the loss_store dict. This is important for print_progress() and
        # saving loss_arrs (column order) in the main script.
        #
        # Subtle Gotcha (tested with Python 3.5): The syntax
        # OrderedDict(key1=val1, key2=val2, ...) does /not/ create the same
        # order since, apparently, it first creates a normal dict with random
        # order (< Python 3.7) and then wraps that in an OrderedDict. We have
        # to pass in a data structure which is already ordered. I'd call this a
        # bug, since both constructor syntax variants result in different
        # objects. In 3.6, the order is preserved in dict() in CPython, in 3.7
        # they finally made it part of the language spec. Thank you!
        loss_store = OrderedDict([('content', content_loss),
                                  ('style', style_loss), ('tv', tv_loss),
                                  ('total', loss)])

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            print('Optimization started...')
            if (print_iterations and print_iterations != 0):
                print_progress(get_loss_vals(loss_store))
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(
                        iteration_times[-10:]) * (iterations - i)
                    print('Iteration %4d/%4d (%s elapsed, %s remaining)' %
                          (i + 1, iterations, hms(elapsed), hms(remaining)))
                else:
                    print('Iteration %4d/%4d' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    loss_vals = get_loss_vals(loss_store)
                    print_progress(loss_vals)
                else:
                    loss_vals = None

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5

                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))
                        if iterations % 2 == 0:
                            print(iterations)
                            print('dang o day')
                            Image.fromarray(img).save('output' +
                                                      str(iterations) + '.jpg',
                                                      quality=95)

                else:
                    img_out = None

                yield i + 1 if last_step else i, img_out, loss_vals

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
Beispiel #23
0
def inferenceImg(network, initial_img, initial_noiseblend, content, style,
                 preserve_colors, iterations, content_weight,
                 content_weight_blend, style_weight, style_layer_weight_exp,
                 style_blend_weight, tv_weight, learning_rate, beta1, beta2,
                 epsilon, pooling, print_iterations, checkpoint_iterations):

    content_shape = (1, ) + content.shape
    style_shape = (1, ) + style.shape

    content_features = {}
    style_features = {}

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight = layer_weight * style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum = layer_weights_sum + style_layers_weights[
            style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = style_layers_weights[
            style_layer] / layer_weights_sum

    # compute content features in feedforward mode
    g1 = tf.Graph()
    with g1.as_default(), g1.device('/cpu:0'), tf.Session() as sess:
        contentImg = tf.placeholder('float', shape=content_shape)
        net = vgg.net_preloaded(vgg_weights, contentImg, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={contentImg: content_pre})

    # compute style features in feedforward mode
    g2 = tf.Graph()
    with g2.as_default(), g2.device('/cpu:0'), tf.Session() as sess:
        styleImg = tf.placeholder('float', shape=style_shape)
        net = vgg.net_preloaded(vgg_weights, styleImg, pooling)
        style_pre = np.array([vgg.preprocess(style, vgg_mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={styleImg: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        noise = np.random.normal(size=content_shape,
                                 scale=np.std(content) * 0.1)
        initial = tf.random_normal(content_shape) * 0.256
        inferenceImg = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, inferenceImg, pooling)

        # compute content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # compute style loss
        style_loss = 0
        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            _, height, width, number = map(lambda i: i.value,
                                           layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(style_layers_weights[style_layer] * 2 *
                                tf.nn.l2_loss(gram - style_gram) /
                                style_gram.size)
        style_loss += style_weight * style_blend_weight * reduce(
            tf.add, style_losses)

        # skip compute variation denoise, in order to shorten the running time
        # total variation denoising
        # tv_y_size = _tensor_size(inferenceImg[:, 1:, :, :])
        # tv_x_size = _tensor_size(inferenceImg[:, :, 1:, :])
        # tv_loss = tv_weight * 2 * (
        #         (tf.nn.l2_loss(inferenceImg[:, 1:, :, :] - inferenceImg[:, :content_shape[1] - 1, :, :]) /
        #          tv_y_size) +
        #         (tf.nn.l2_loss(inferenceImg[:, :, 1:, :] - inferenceImg[:, :, :content_shape[2] - 1, :]) /
        #          tv_x_size))

        tv_loss = 0
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer training
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = inferenceImg.eval()

                    img_out = vgg.unprocess(best.reshape(content_shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Beispiel #24
0
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path):
    mod = len(content_targets) % batch_size
    if (mod > 0):
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod]

    batch_shape = (batch_size, 256, 256, 3)
    style_shape = (1, *style_target.shape)
    print('batch shape:', batch_shape)
    print('style shape:', style_shape)

    with tf.Graph().as_default(), tf.Session() as sess:
        # Declare placeholders we'll feed into the graph
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        X_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name='X_content')

        # Precompute content features
        start_time = time.time()
        content_features = {}

        X_content_pre = vgg.preprocess(X_content)
        content_net = vgg.net(vgg_path, X_content_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        end_time = time.time()
        delta_time = end_time - start_time
        print('precompute content features time:', delta_time)

        # Precompute style features
        start_time = time.time()
        style_features = {}
        style_pre = np.array([style_target])  # feed

        style_image_pre = vgg.preprocess(style_image)
        style_net = vgg.net(vgg_path, style_image_pre)
        for layer in STYLE_LAYERS:
            features = style_net[layer].eval(
                feed_dict={style_image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

        end_time = time.time()
        delta_time = end_time - start_time
        print('precompute style features time:', delta_time)

        # Build prediction net
        preds = transform.net(X_content / 255.0)
        preds_pre = vgg.preprocess(preds)
        preds_net = vgg.net(vgg_path, preds_pre)

        # Compute content loss ?
        start_time = time.time()
        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            preds_net[CONTENT_LAYER])
        content_loss = content_weight * (
            2 * tf.nn.l2_loss(preds_net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size)
        end_time = time.time()
        delta_time = end_time - start_time
        print('compute content loss time:', delta_time)

        # Compute style loss ?
        start_time = time.time()
        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = preds_net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                style_gram.size)
        style_loss = style_weight * functools.reduce(tf.add,
                                                     style_losses) / batch_size
        end_time = time.time()
        delta_time = end_time - start_time
        print('compute style loss time:', delta_time)

        # Total variation denoising ?
        start_time = time.time()
        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size
        end_time = time.time()
        delta_time = end_time - start_time
        print('total variation denoising time:', delta_time)

        # Overall loss
        start_time = time.time()
        all_loss = content_loss + style_loss + tv_loss
        end_time = time.time()
        delta_time = end_time - start_time
        print('compute overall loss time:', delta_time)

        # Build train
        train = tf.train.AdamOptimizer(learning_rate).minimize(all_loss)

        sess.run(tf.global_variables_initializer())

        print('Start training...')
        start_time = time.time()

        num_examples = len(content_targets)
        n_batches = num_examples // batch_size
        iterations = n_batches * epochs

        # For writing training checkpoints.
        saver = tf.train.Saver()

        for epoch in range(epochs):
            for batch in range(n_batches):
                iteration = epoch * n_batches + batch + 1

                # curr = iteration * batch_size
                # step = curr + batch_size
                curr = batch * batch_size
                step = curr + batch_size

                X_batch = np.zeros(batch_shape, dtype=np.float32)  # feed
                for i, img_p in enumerate(content_targets[curr:step]):
                    X_batch[i] = get_img(img_p,
                                         (256, 256, 3)).astype(np.float32)

                assert X_batch.shape[0] == batch_size
                sess.run(train, feed_dict={X_content: X_batch})

                to_get = [style_loss, content_loss, tv_loss, all_loss, preds]

                if (iteration % display_every_n == 0):
                    tup = sess.run(to_get, feed_dict={X_content: X_batch})
                    _style_loss, _content_loss, _tv_loss, _all_loss, _preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _all_loss)
                    print(
                        'Iteration {}/{} - style loss: {:.4f}, content loss: {:.4f}, tv loss: {:.4f}, all loss: {:.4f}'
                        .format(iteration, iterations, *losses))
                if (iteration % save_every_n == 0) or (iteration
                                                       == iterations):
                    _all_loss = sess.run(all_loss,
                                         feed_dict={X_content: X_batch})
                    ckpt = saver.save(
                        sess,
                        os.path.join(FLAGS.checkpoint_dir,
                                     "ckpt_i{}".format(iteration)))
                    print('Epoch {}/{}, Iteration: {}/{}, loss: {}'.format(
                        epoch, epochs, iteration, iterations, _all_loss))
                    yield (epoch, iteration, ckpt)

        end_time = time.time()
        delta_time = end_time - start_time
        print('Done! Train total time:', delta_time)
Beispiel #25
0
def stylize(content,
            style,
            initial,
            initial_noiseblend,
            content_weight=5e0,
            content_layer_num=9,
            style_weight=5e2,
            style_layer_weight=(0.2, 0.2, 0.2, 0.2, 0.2),
            tv_weight=1e2,
            learning_rate=1e1,
            beta1=0.9,
            beta2=0.999,
            epsilon=1e-8,
            preserve_colors=False,
            pooling='max',
            iterations=1000,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1, ) + content.shape

    content_features = {}
    style_features = {}
    style_layers_weights = {}
    content_layer = CONTENT_LAYERS[content_layer_num]

    for i, style_layer in enumerate(STYLE_LAYERS):
        style_layers_weights[style_layer] = style_layer_weight[i]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)
    image = tf.placeholder(tf.float32, shape=shape)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
    style_pre = np.array([vgg.preprocess(style, vgg_mean_pixel)])

    # compute content features,style features in feedforward mode
    with tf.Session() as sess:
        content_features[content_layer] = sess.run(
            net[content_layer], feed_dict={image: content_pre})

        for layer in STYLE_LAYERS:
            features = sess.run(net[layer], feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    # make stylized image using backpropogation
    if initial is None:
        noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
        initial = tf.random_normal(shape) * 0.256
    else:
        initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
        initial = initial.astype(np.float32)
        noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
        initial = initial * (1 - initial_noiseblend) + (
            tf.random_normal(shape) * 0.256) * initial_noiseblend
    image = tf.Variable(initial)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    # content loss
    content_loss = content_weight * 2 * tf.nn.l2_loss(
        net[content_layer] -
        content_features[content_layer]) / content_features[content_layer].size

    # style loss
    style_loss = 0
    for style_layer in STYLE_LAYERS:
        layer = net[style_layer]
        _, height, width, number = map(lambda i: i.value, layer.get_shape())
        size = height * width * number
        feats = tf.reshape(layer, (-1, number))
        gram = tf.matmul(tf.transpose(feats), feats) / size
        style_gram = style_features[style_layer]
        style_loss += style_weight * style_layers_weights[
            style_layer] * 2 * tf.nn.l2_loss(gram -
                                             style_gram) / style_gram.size

    # total variation denoising
    tv_y_size = _tensor_size(image[:, 1:, :, :])
    tv_x_size = _tensor_size(image[:, :, 1:, :])
    tv_loss = tv_weight * 2 * (
        (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :]) /
         tv_y_size) +
        (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :]) /
         tv_x_size))

    # overall loss
    loss = content_loss + style_loss + tv_loss

    # optimizer setup
    train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                        epsilon).minimize(loss)

    def print_progress():
        print('  content loss: %g\n' % content_loss.eval())
        print('    style loss: %g\n' % style_loss.eval())
        print('       tv loss: %g\n' % tv_loss.eval())
        print('    total loss: %g\n' % loss.eval())

    # optimization
    best_loss = float('inf')
    best = None
    images = []
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print('Optimization started...\n')
        if (print_iterations and print_iterations != 0):
            print_progress()
        for i in range(iterations):
            train_step.run()

            last_step = (i == iterations - 1)
            if last_step or (print_iterations and i % print_iterations == 0):
                print('Iteration %4d/%4d\n' % (i + 1, iterations))
                print_progress()

            if (checkpoint_iterations
                    and i % checkpoint_iterations == 0) or last_step:
                this_loss = loss.eval()

                styled_image = np.clip(
                    vgg.unprocess(image.eval().reshape(shape[1:]),
                                  vgg_mean_pixel), 0, 255)

                if this_loss < best_loss:
                    best_loss = this_loss
                    best = styled_image

                if preserve_colors and preserve_colors == True:
                    original_image = np.clip(content, 0, 255)

                    # Luminosity transfer steps:
                    # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                    # 2. Convert stylized grayscale into YUV (YCbCr)
                    # 3. Convert original image into YUV (YCbCr)
                    # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                    # 5. Convert recombined image from YUV back to RGB

                    # 1
                    styled_grayscale = rgb2gray(styled_image)
                    styled_grayscale_rgb = gray2rgb(styled_grayscale)

                    # 2
                    styled_grayscale_yuv = np.array(
                        Image.fromarray(styled_grayscale_rgb.astype(
                            np.uint8)).convert('YCbCr'))

                    # 3
                    original_yuv = np.array(
                        Image.fromarray(original_image.astype(
                            np.uint8)).convert('YCbCr'))

                    # 4
                    w, h, _ = original_image.shape
                    combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                    combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                    combined_yuv[..., 1] = original_yuv[..., 1]
                    combined_yuv[..., 2] = original_yuv[..., 2]

                    # 5
                    styled_image = np.array(
                        Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))

                plt.figure(figsize=(8, 8))
                plt.imshow(styled_image.astype(np.uint8))
                plt.axis('off')
                plt.show()

                images.append(styled_image.astype(np.uint8))

    return images, best
style_batch = np.zeros([batch_size, 128, 128])

for i, single in enumerate(style_batch):
    style_batch[i] = ss

ss = np.reshape(ss, [-1, 128, 128, 1])

style_features = {}

# precompute style features
with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:

    style_image = tf.placeholder(tf.float32,
                                 shape=[None, 128, 128, 1],
                                 name='style_image')
    style_image_pre = vgg.preprocess(style_image)
    net = vgg.net(vgg_path, style_image_pre)

    for layer in STYLE_LAYERS:
        features = net[layer].eval(feed_dict={style_image: ss})
        features = np.reshape(features, (-1, features.shape[3]))
        gram = np.matmul(features.T, features) / features.size
        style_features[layer] = gram
'''
stage 1: generate depth images from joints distribution
'''
gen = Generator
img_gen = gen.generator(X_in_label)
img_gen_trans = tf.reshape(img_gen, [-1, 128, 128])
# loss_generator = tf.reduce_mean(tf.abs(img_gen_trans - X_in_image))
loss_generator = tf.reduce_mean(
Beispiel #27
0
def main():
    content_path, style_path, width, style_scale = sys.argv[1:]
    width = int(width)
    style_scale = float(style_scale)

    content_image = imread(content_path)
    style_image = imread(style_path)

    if width > 0:
        new_shape = (int(math.floor(float(content_image.shape[0]) /
                content_image.shape[1] * width)), width)
        content_image = sm.imresize(content_image, new_shape)
    if style_scale > 0:
        style_image = sm.imresize(style_image, style_scale)

    shape = (1,) + content_image.shape
    style_shape = (1,) + style_image.shape

    content_features = {}
    style_features = {}
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(VGG_PATH, image)
        content_pre = np.array([vgg.preprocess(content_image, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shape)
        net, _ = vgg.net(VGG_PATH, image)
        style_pre = np.array([vgg.preprocess(style_image, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / (features.size)
            style_features[layer] = gram

    with tf.Graph().as_default():
        noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1)
        init = tf.random_normal(shape) * 256 / 1000
        image = tf.Variable(init)
        net, _ = vgg.net(VGG_PATH, image)

        content_loss = tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / (size)
            style_gram = style_features[i]
            style_losses.append(tf.nn.l2_loss(gram - style_gram))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        tv_loss = (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) +
                tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]))
        loss = ALPHA * content_loss + BETA * style_loss + TV_WEIGHT * tv_loss

        train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(100000):
                print 'i = %d' % i
                if i % 10 == 0:
                    print '\tcontent_loss = %15.0f' % content_loss.eval()
                    print '\tstyle_loss   = %15.0f' % style_loss.eval()
                    print '\ttv_loss      = %15.0f' % tv_loss.eval()
                    print '\tloss         = %15.0f' % loss.eval()
                imsave('%05d.jpg' % i, vgg.unprocess(
                        image.eval().reshape(shape[1:]), mean_pixel))
                train_step.run()
Beispiel #28
0
def main():
    '''Search for similar images

    Search the style directory for images that closely resemble each image in
    the content directory. Save those images in an output directory folder
    corresponding to each content image, renamed as their matching rank number.
    '''

    parser = build_parser()
    options = parser.parse_args()

    content_files = os.listdir(options.content_dir)
    content_images = [
        read_img(os.path.join(options.content_dir, f)) for f in content_files
    ]

    # n_content by n_style matrix and list to store the best style images
    n_content = len(content_files)
    n_total = n_content * options.n_style
    best_style_score = np.float('inf') * np.ones((n_content, options.n_style))
    best_style_file = np.array([['' for i in range(options.n_style)]
                                for h in range(n_content)],
                               dtype=object)

    vgg_weights, vgg_mean_pixel = vgg.load_net(options.network)

    content_features = [{} for _ in content_images]
    for i, c in enumerate(content_images):
        with tf.Graph().as_default(), tf.Session() as sess:
            image = tf.placeholder('float', shape=(1, ) + c.shape)
            net = vgg.net_preloaded(vgg_weights, image, 'max')
            content_pre = np.array([vgg.preprocess(c, vgg_mean_pixel)])
            for layer in CONTENT_LAYERS:
                content_features[i][layer] = net[layer].eval(
                    feed_dict={image: content_pre})

    final_style_score, final_style_file = search_dir(
        content_features, vgg_weights, vgg_mean_pixel, best_style_score,
        best_style_file, options.style_dir, options.recurse, options.n_search)

    if np.any(np.isinf(final_style_score)):
        inf_total = np.sum(np.isinf(final_style_score))
        print('%d out of %d style images not found.' % (inf_total, n_total),
              'Try rerunning with a smaller n-style.')
        raise

    sorted_files = final_style_file[np.indices(
        (n_content, options.n_style))[0],
                                    final_style_score.argsort()]

    format_str = '{0:0>%d}.{1}' % np.ceil(np.log10(n_total))

    os.mkdir(options.output_dir)
    for i, f in enumerate(content_files):
        fname = ''.join(f.split('.')[:-1])
        print('Copying style files for %s' % fname)
        os.mkdir(os.path.join(options.output_dir, fname))
        for j in range(options.n_style):
            print(sorted_files[i, j])
            img_ext = sorted_files[i, j].split('.')[-1]
            shutil.copy(
                sorted_files[i, j],
                os.path.join(options.output_dir, fname,
                             format_str.format(j, img_ext)))
Beispiel #29
0
def optimize(content_targets,
             style_target,
             content_weight,
             style_weight,
             tv_weight,
             vgg_path,
             epochs=2,
             print_iterations=1000,
             batch_size=4,
             save_path='saver/fns.ckpt',
             slow=False,
             learning_rate=1e-3,
             device='/cpu:0',
             debug=False,
             total_iterations=-1,
             base_model_path=None):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod]

    style_features = {}

    batch_shape = (batch_size, 256, 256, 3)
    style_shape = (1, ) + style_target.shape
    print(style_shape)

    # precompute style features
    print("Precomputing style features")
    sys.stdout.flush()
    with tf.Graph().as_default(), tf.device(device), tf.Session(
            config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32,
                                   shape=batch_shape,
                                   name="X_content")
        X_pre = vgg.preprocess(X_content)

        print("Precomputing content features")
        sys.stdout.flush()

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256)
            preds_pre = preds
        else:
            preds = transform.net(X_content / 255.0)
            preds_pre = vgg.preprocess(preds)

        print("Building VGG net")
        sys.stdout.flush()
        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(
            content_features[CONTENT_LAYER]) * batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(
            net[CONTENT_LAYER])
        content_loss = content_weight * (
            2 * tf.nn.l2_loss(net[CONTENT_LAYER] -
                              content_features[CONTENT_LAYER]) / content_size)

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i: i.value,
                                             layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0, 2, 1])
            # see https://github.com/tensorflow/tensorflow/issues/6560
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram) /
                                style_gram.size)

        style_loss = style_weight * reduce(tf.add, style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:, 1:, :, :])
        tv_x_size = _tensor_size(preds[:, :, 1:, :])
        y_tv = tf.nn.l2_loss(preds[:, 1:, :, :] -
                             preds[:, :batch_shape[1] - 1, :, :])
        x_tv = tf.nn.l2_loss(preds[:, :, 1:, :] -
                             preds[:, :, :batch_shape[2] - 1, :])
        tv_loss = tv_weight * 2 * (x_tv / tv_x_size +
                                   y_tv / tv_y_size) / batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.initialize_all_variables())

        # If base model file is present, load that in to the session
        if base_model_path:
            saver = tf.train.Saver()
            if os.path.isdir(base_model_path):
                ckpt = tf.train.get_checkpoint_state(base_model_path)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                else:
                    raise Exception("No checkpoint found...")
            else:
                saver.restore(sess, base_model_path)

        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        sys.stdout.flush()
        for epoch in range(epochs):
            num_examples = len(content_targets)
            print("number of examples: %s" % num_examples)
            sys.stdout.flush()
            iterations = 0
            while iterations * batch_size < num_examples:
                print("Current iteration : %s" % iterations)
                sys.stdout.flush()

                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                    X_batch[j] = get_img(img_p,
                                         (256, 256, 3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {X_content: X_batch}

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = False
                if epoch == epochs - 1 and iterations * batch_size >= num_examples:
                    is_last = True
                if total_iterations > 0 and iterations >= total_iterations:
                    is_last = True
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {X_content: X_batch}

                    tup = sess.run(to_get, feed_dict=test_feed_dict)
                    _style_loss, _content_loss, _tv_loss, _loss, _preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                        _preds = vgg.unprocess(_preds)
                    else:
                        saver = tf.train.Saver()
                        res = saver.save(sess, save_path)
                    yield (_preds, losses, iterations, epoch)
                if is_last:
                    break
Beispiel #30
0
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path, epochs=2, print_iterations=1000,
             batch_size=4, save_path='saver/fns.ckpt', slow=False,
             learning_rate=1e-3, debug=False, max_sample=4000):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod]

    if len(content_targets) > max_sample:
        content_targets = content_targets[:max_sample]

    style_features = {}

    batch_shape = (batch_size,256,256,3)
    style_shape = (1,) + style_target.shape
    print(style_shape)

    # precompute style features
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image:style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256
            )
            preds_pre = preds
        else:
            preds = transform.net(X_content/255.0)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(content_features[CONTENT_LAYER])*batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(net[CONTENT_LAYER])
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_size
        )

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i:i.value,layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0,2,1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:,1:,:,:])
        tv_x_size = _tensor_size(preds[:,:,1:,:])
        y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:batch_shape[1]-1,:,:])
        x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[2]-1,:])
        tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.initialize_all_variables())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                   X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {
                   X_content:X_batch
                }

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {
                       X_content:X_batch
                    }

                    tup = sess.run(to_get, feed_dict = test_feed_dict)
                    _style_loss,_content_loss,_tv_loss,_loss,_preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                       _preds = vgg.unprocess(_preds)
                    else:
                       saver = tf.train.Saver()
                       res = saver.save(sess, save_path)
                    yield(_preds, losses, iterations, epoch)
Beispiel #31
0
def main():
    # This will print all array values in full
    np.set_printoptions(threshold=np.nan)

    parser = build_parser()
    options = parser.parse_args()

    if not os.path.isfile(options.network):
        parser.error(
            "Network %s does not exist. (Did you forget to download it?)" %
            options.network)

    # Load the vgg weights in advance
    vgg_weights, vgg_mean_pixel = vgg.load_net(options.network)
    content_image = imread(options.content)

    # Jacob: moved this here since the same image features will be used for each style image
    content_features = {}
    g = tf.Graph()
    shape = (1, ) + content_image.shape
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, options.pooling)
        content_pre = np.array([vgg.preprocess(content_image, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    print("READY")
    sys.stdout.flush(
    )  # Make sure Java can sense this output before Python blocks waiting for input
    count = 0
    #for style in style_images: # loop through separate style inputs individually
    for line in sys.stdin:
        # Assumes a single line of input will be a json for one image
        style = jsonimread(line)

        width = options.width
        if width is not None:
            new_shape = (int(
                math.floor(
                    float(content_image.shape[0]) / content_image.shape[1] *
                    width)), width)
            content_image = scipy.misc.imresize(content_image, new_shape)
        target_shape = content_image.shape
        # This batch of code was in a loop for each style input before
        style_scale = STYLE_SCALE
        if options.style_scales is not None:
            style_scale = options.style_scales[i]
        style = scipy.misc.imresize(
            style, style_scale * target_shape[1] / style.shape[1])

        # Removed code for blanding between multiple styles
        style_blend_weights = [1.0]

        initial = options.initial
        if initial is not None:
            initial = scipy.misc.imresize(imread(initial),
                                          content_image.shape[:2])
            # Initial guess is specified, but not noiseblend - no noise should be blended
            if options.initial_noiseblend is None:
                options.initial_noiseblend = 0.0
        else:
            # Neither inital, nor noiseblend is provided, falling back to random generated initial guess
            if options.initial_noiseblend is None:
                options.initial_noiseblend = 1.0
            if options.initial_noiseblend < 1.0:
                initial = content_image

        if options.checkpoint_output and "%s" not in options.checkpoint_output:
            parser.error("To save intermediate images, the checkpoint output "
                         "parameter must contain `%s` (e.g. `foo%s.jpg`)")

        for iteration, image in stylize(
                network=options.network,
                initial=initial,
                initial_noiseblend=options.initial_noiseblend,
                content=content_image,
                styles=[style
                        ],  # Changed this to be a list of only one style image
                preserve_colors=options.preserve_colors,
                iterations=options.iterations,
                content_weight=options.content_weight,
                content_weight_blend=options.content_weight_blend,
                style_weight=options.style_weight,
                style_layer_weight_exp=options.style_layer_weight_exp,
                style_blend_weights=style_blend_weights,
                tv_weight=options.tv_weight,
                learning_rate=options.learning_rate,
                beta1=options.beta1,
                beta2=options.beta2,
                epsilon=options.epsilon,
                pooling=options.pooling,
                print_iterations=options.print_iterations,
                checkpoint_iterations=options.checkpoint_iterations,
                # These vgg settings are now loaded only once
                vgg_weights=vgg_weights,
                vgg_mean_pixel=vgg_mean_pixel,
                content_features=content_features):
            output_file = None
            combined_rgb = image
            if iteration is not None:
                if options.checkpoint_output:
                    output_file = options.checkpoint_output % iteration
            else:
                # Change final output files to simply be numbered
                output_file = "%d.JPG" % count
                count = count + 1
            if output_file:
                # No longer save image to file
                #imsave(output_file, combined_rgb)
                # Output json String
                print(json.dumps(combined_rgb.tolist()))
                sys.stdout.flush(
                )  # Make sure Java can sense this output before Python blocks waiting for input
    print("DONE")
Beispiel #32
0
def stylize(network,
            initial,
            initial_noiseblend,
            content,
            styles,
            luminance_transfer,
            iterations,
            content_weight,
            content_weight_blend,
            style_weight,
            style_layer_weight_exp,
            style_blend_weights,
            tv_weight,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            pooling,
            print_iterations=None,
            checkpoint_iterations=None):
    """
    This function yields tuples (iteration, image).
    `iteration` is None if this is the final image (the last iteration).
    Other tuples are yielded every `checkpoint_iterations` iterations.
    """
    shape = (1, ) + content.shape
    style_shapes = [(1, ) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {
            'relu4_2': content_weight_blend,
            'relu5_2': 1.0 - content_weight_blend
        }
        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in tqdm(range(iterations)):
                #stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if luminance_transfer and luminance_transfer == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized image into YUV (YCbCr)
                        # 2. Convert original image into YUV (YCbCr)
                        # 3. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 4. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(styled_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 2
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 3
                        h, w, _ = original_image.shape
                        combined_yuv = np.empty((h, w, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 4
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Beispiel #33
0
def main():
    global options, device

    # Get the ENV context
    script_dir = os.path.dirname(__file__)
    env = os.environ.copy()

    # Set the input folder
    input_dir = os.path.expanduser(options.input_dir) if options.input_dir \
        else os.path.join(script_dir, '..', 'data')
    vgg_path = os.path.join(input_dir, 'vgg', 'imagenet-vgg-verydeep-19.mat')
    coco_dir = os.path.join(input_dir, 'train')
    if not os.path.isdir(input_dir):
        fail('Failed to find the input folder at ' + input_dir)
    if not os.path.isfile(vgg_path):
        error('Failed to find the VGG model file at ' + vgg_path)
        fail(
            'Please download it from http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat'
        )
    if not os.path.isdir(coco_dir):
        error('Failed to find the COCO 2014 training images in ' + coco_dir)
        fail(
            'Plese download it from http://images.cocodataset.org/zips/train2014.zip'
        )

    # Set the output folder
    output_dir = os.path.expanduser(options.output_dir) if options.output_dir \
        else env.get('OUTPUT_DIR', os.path.join(script_dir, '..', 'output'))
    model_dir = os.path.join(output_dir, 'checkpoint')
    export_dir = os.path.join(output_dir, 'savedmodel')
    if os.path.isdir(output_dir):
        if not os.path.isdir(model_dir):
            info('Creating a folder to store checkpoint at ' + model_dir)
            os.makedirs(model_dir)
        if os.path.isdir(export_dir):
            info('Deleting the folder containing SavedModel at ' + export_dir)
            shutil.rmtree(export_dir)
    else:
        info('Creating a folder to store checkpoint at ' + model_dir)
        os.makedirs(model_dir)

    # Set the TensorBoard folder
    log_dir = os.path.expanduser(options.log_dir) if options.log_dir \
        else env.get('LOG_DIR', os.path.join(script_dir, '..', 'log'))
    if not os.path.isdir(log_dir):
        info('Creating a folder to store TensorBoard events at ' + log_dir)
        os.makedirs(log_dir)

    # Set the style image path
    style_path = os.path.expanduser(options.style_image) if os.path.isfile(options.style_image) \
        else os.path.join(input_dir, 'style_images', options.style_image)
    style_name = os.path.basename(os.path.splitext(style_path)[0])
    ckpt_path = os.path.join(model_dir, style_name + '.ckpt')
    if not os.path.isfile(style_path):
        fail('Failed to find the style image at ' + style_path)

    # Set hyper parameters
    batch_size = options.batch_size
    epochs = options.epoch
    lr = options.lr
    lambda_tv = options.lambda_tv
    lambda_feat = options.lambda_feat
    lambda_style = options.lambda_style

    # Print parsed arguments
    info('--------- Training parameters -------->')
    info('Style image path: ' + style_path)
    info('VGG model path: ' + vgg_path)
    info('Training image dir: ' + coco_dir)
    info('Checkpoint path: ' + ckpt_path)
    info('TensorBoard log dir: ' + log_dir)
    info('Training device: ' + device)
    info('Batch size: %d' % batch_size)
    info('Epoch count: %d' % epochs)
    info('Learning rate: ' + str(lr))
    info('Lambda tv: ' + str(lambda_tv))
    info('Lambda feat: ' + str(lambda_feat))
    info('Lambda style: ' + str(lambda_style))
    info('<-------- Training parameters ---------')

    # COCO images to train
    content_targets = list_jpgs(coco_dir)
    if len(content_targets) % batch_size != 0:
        content_targets = content_targets[:-(len(content_targets) %
                                             batch_size)]
    info('Total training data size: %d' % len(content_targets))

    # Image shape
    image_shape = (224, 224, 3)
    batch_shape = (batch_size, ) + image_shape

    # Style target
    style_target = read_img(style_path)
    style_shape = (1, ) + style_target.shape

    with tf.device(device), tf.Session() as sess:
        # Compute gram maxtrix of style target
        style_image = tf.placeholder(tf.float32,
                                     shape=style_shape,
                                     name='style_image')
        vggstyletarget = vgg.net(vgg_path, vgg.preprocess(style_image))
        style_vgg = vgg.get_style_vgg(vggstyletarget, style_image,
                                      np.array([style_target]))

        # Content target feature
        content_vgg = {}
        inputs = tf.placeholder(tf.float32, shape=batch_shape, name='inputs')
        content_net = vgg.net(vgg_path, vgg.preprocess(inputs))
        content_vgg['relu4_2'] = content_net['relu4_2']

        # Feature after transformation
        outputs = stylenet.net(inputs / 255.0)
        vggoutputs = vgg.net(vgg_path, vgg.preprocess(outputs))

        # Compute feature loss
        loss_f = options.lambda_feat * vgg.total_content_loss(
            vggoutputs, content_vgg, batch_size)

        # Compute style loss
        loss_s = options.lambda_style * vgg.total_style_loss(
            vggoutputs, style_vgg, batch_size)

        # Total variation denoising
        loss_tv = options.lambda_tv * vgg.total_variation_regularization(
            outputs, batch_size, batch_shape)

        # Total loss
        total_loss = loss_f + loss_s + loss_tv
        train_step = tf.train.AdamOptimizer(options.lr).minimize(total_loss)

        # Create summary
        tf.summary.scalar('loss', total_loss)
        merged = tf.summary.merge_all()

        # Used to save model
        saver = tf.train.Saver()
        builder = tf.saved_model.builder.SavedModelBuilder(export_dir)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        # Restore checkpoint if available
        sess.run(tf.global_variables_initializer())
        ckpt = tf.train.get_checkpoint_state(model_dir)
        if ckpt and ckpt.model_checkpoint_path:
            info('Restoring from ' + ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)

        # Write the graph
        writer = tf.summary.FileWriter(log_dir, sess.graph)

        # Start to train
        total_step = 0
        for epoch in range(epochs):
            info('epoch: %d' % epoch)
            step = 0
            while step * batch_size < len(content_targets):
                time_start = time.time()

                # Load one batch
                batch = np.zeros(batch_shape, dtype=np.float32)
                for i, img in enumerate(
                        content_targets[step * batch_size:(step + 1) *
                                        batch_size]):
                    batch[i] = read_img(img, image_shape).astype(
                        np.float32)  # (224,224,3)

                # Proceed one step
                step += 1
                total_step += 1
                _, loss, summary = sess.run([train_step, total_loss, merged],
                                            feed_dict={inputs: batch})

                time_elapse = time.time() - time_start
                if total_step % 5 == 0:
                    info('[step {}] elapse time: {} loss: {}'.format(
                        total_step, time_elapse, loss))
                    writer.add_summary(summary, total_step)

                # Write checkpoint
                if total_step % 2000 == 0:
                    info('Saving checkpoint to ' + ckpt_path)
                    saver.save(sess, ckpt_path, global_step=total_step)

        info('Exporting SavedModel to ' + export_dir)
        serving_signatures = {
            'Transfer': #tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            tf.saved_model.signature_def_utils.predict_signature_def(
                { tf.saved_model.signature_constants.PREDICT_INPUTS: inputs },
                { tf.saved_model.signature_constants.PREDICT_OUTPUTS: outputs }
            )
        }
        builder.add_meta_graph_and_variables(
            sess, [tf.saved_model.tag_constants.SERVING],
            signature_def_map=serving_signatures,
            clear_devices=True)
        builder.save()
Beispiel #34
0
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = vgg.load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = vgg.net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            iteration_times = []
            start = time.time()
            for i in range(iterations):
                iteration_start = time.time()
                if i > 0:
                    elapsed = time.time() - start
                    # take average of last couple steps to get time per iteration
                    remaining = np.mean(iteration_times[-10:]) * (iterations - i)
                    stderr.write('Iteration %4d/%4d (%s elapsed, %s remaining)\n' % (
                        i + 1,
                        iterations,
                        hms(elapsed),
                        hms(remaining)
                    ))
                else:
                    stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )

                iteration_end = time.time()
                iteration_times.append(iteration_end - iteration_start)
Beispiel #35
0
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path, use_IN, epochs=2, print_iterations=1000,
             batch_size=4, save_path='checkpoints/fast_style_transfer.ckpt', slow=False,
             learning_rate=1e-3, debug=False):
    if slow:
        batch_size = 1

    # content_target is a list of files, 4-D size, so this is about the batch size here.
    # If using only one content image, then mod here is 0.
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly...")
        content_targets = content_targets[:-mod]

    # training image get to be 256 x 256 because of get_img resize,
    # it then get into tensorflow graph from Adam optimizer feed_dict.
    batch_shape = (batch_size, 256, 256, 3)
    style_shape = (1,) + style_target.shape # add 1 in the front for batch size, 4-D.
    print(f"batch_shape of the content image is: {batch_shape}")
    print(f"style_shape of the style image is: {style_shape}")

    ### Graph Construction ###
    # vgg won't be trained, because in vgg.py the weights are loaded through that matlab file.
    # computed vgg style features in gram matrices
    # tf.device('/cpu:0')
    config = v1.ConfigProto()
    config.gpu_options.allow_growth = True

    style_features = {}
    with tf.Graph().as_default(), v1.Session(config=config) as sess:
        style_image = v1.placeholder(tf.float32, shape=style_shape, name='style_image') # 4-D placeholder for feed_dict
        vgg_style_net = vgg.net(vgg_path, vgg.preprocess(style_image)) # extract feature volume
        np_style_target = np.array([style_target]) # a 3-D numpy array for feed_dict's input

        for layer in STYLE_LAYERS:
            # vgg_style_net[layer] is a tf.Tensor returned by tf.nn.relu,
            # eval at that layer, by running forward to that vgg layer or entire network.
            features = vgg_style_net[layer].eval(feed_dict={style_image:np_style_target}) # extract a fVol value
            features = np.reshape(features, (-1, features.shape[3])) # (N*H*W, C)
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    # computed vgg content feature map and both losses
    with tf.Graph().as_default(), v1.Session(config=config) as sess:
        X_content = v1.placeholder(tf.float32, shape=batch_shape, name="X_content") # 4-D
        vgg_content_net = vgg.net(vgg_path, vgg.preprocess(X_content)) # run ground truth image through the pre-trained model

        # noisy prediction image runs through feed forward conv net, then
        # run through vgg to extract feature volume predicitons
        if slow:
            preds = tf.Variable(
                tf.random.normal(X_content.get_shape()) * 0.256
            )
            preds_pre = preds
        else:
            preds = transform.net(X_content/255.0, use_IN) # run through the style feed forward network. why need to normalize pixel to 0-1?
        net = vgg.net(vgg_path, vgg.preprocess(preds)) # run generated image through the pre-trained model

        # _tensor_size is a reduce function only count from [1:],
        # so it doesn't have batch_size information.
        content_size = _tensor_size(vgg_content_net[CONTENT_LAYER]) * batch_size
        vgg_content_net_size = _tensor_size(vgg_content_net[CONTENT_LAYER])
        vgg_transform_content_net_size = _tensor_size(net[CONTENT_LAYER])
        # print(f"vgg_content_net_size is {vgg_content_net_size}")
        # print(vgg_content_net[CONTENT_LAYER])
        # print(f"vgg_transform_content_net_size is {vgg_transform_content_net_size}")
        # print(net[CONTENT_LAYER])
        assert vgg_content_net_size == vgg_transform_content_net_size

        # define loss functions
        # content loss
        content_l2_loss = 2 * tf.nn.l2_loss(net[CONTENT_LAYER] - vgg_content_net[CONTENT_LAYER])
        content_loss = content_weight * (content_l2_loss / content_size)

        # style loss
        style_l2_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            N, H, W, C = map(lambda i : i, layer.get_shape())
            feats = tf.reshape(layer, (N, H*W, C))        # N, HW, C
            feats_T = tf.transpose(feats, perm=[0, 2, 1]) # N, C, HW
            pred_gram = tf.matmul(feats_T, feats) / (H * W * C)
            true_gram = style_features[style_layer] # numpy array

            style_l2_loss = 2 * tf.nn.l2_loss(pred_gram - true_gram)
            style_l2_losses.append(style_l2_loss / true_gram.size)
        style_loss = style_weight * functools.reduce(tf.add, style_l2_losses) / batch_size

        # total variation denoising regularization loss
        # test if not needed in NN conv case and mirror padding
        # tv_y_size = _tensor_size(preds[:,1:,:,:])
        # tv_x_size = _tensor_size(preds[:,:,1:,:])
        # # N, H, W, C
        # y_tv = 2 * tf.nn.l2_loss(preds[:, 1:, :, :] - preds[:, :batch_shape[1]-1, :, :]) # H, down - up
        # x_tv = 2 * tf.nn.l2_loss(preds[:, :, 1:, :] - preds[:, :, :batch_shape[2]-1, :]) # W, right - left
        # tv_loss = tv_weight * (x_tv/tv_x_size + y_tv/tv_y_size) / batch_size

        # total loss
        # total_loss = content_loss + style_loss + tv_loss
        total_loss = content_loss + style_loss

        # train the feed forward net, and save weights to a checkpoint.
        import random
        uid = random.randint(1, 100)
        print("This random UID is: %s" % uid)

        optimizer = v1.train.AdamOptimizer(learning_rate).minimize(total_loss)
        sess.run(v1.global_variables_initializer())
        for epoch in range(epochs): # epoch loop
            iterations = 0
            num_examples = len(content_targets) # COCO train2014 ~20000 images
            while iterations * batch_size < num_examples: # batch loop
                # start training a batch
                start_time = time.time()

                X_batch = np.zeros(batch_shape, dtype=np.float32)
                start = iterations * batch_size
                end = iterations * batch_size + batch_size
                for i, img_p in enumerate(content_targets[start:end]): # img_p is a coco images
                   X_batch[i] = get_img(img_p, (256,256,3)).astype(np.float32) # resize to 256 x 256

                optimizer.run(feed_dict={X_content:X_batch})

                end_time = time.time()
                # end training a batch

                # update training information
                iterations += 1
                is_print_iter = int(iterations) % print_iterations == 0
                is_last_train = epoch == epochs - 1 and iterations * batch_size >= num_examples

                if slow:
                    is_print_iter = epoch % print_iterations == 0
                if debug:
                    print("UID: %s, batch training time: %s" % (uid, end_time - start_time))
                # monitor the training losses
                if is_print_iter or is_last_train:
                    _style_loss, _content_loss, _total_loss, _preds = \
                        sess.run([style_loss, content_loss, total_loss, preds],
                                  feed_dict={X_content:X_batch})
                    losses = (_style_loss, _content_loss, _total_loss)
                    generated_image = _preds

                    if slow:
                       generated_image = vgg.unprocess(generated_image)
                    else:
                       res = v1.train.Saver().save(sess, save_path)
                    print("yield")
                    yield(generated_image, losses, iterations, epoch)
Beispiel #36
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        def print_progress(i, last=False):
            global timenow
            stderr.write('Iteration %d/%d, time: %dms\n' % (i + 1, iterations, current_milli_time() - timenow))
            timenow = current_milli_time()
            if last or (print_iterations and i % print_iterations == 0):
                stderr.write('  content loss: %g\n' % content_loss.eval())
                stderr.write('    style loss: %g\n' % style_loss.eval())
                stderr.write('       tv loss: %g\n' % tv_loss.eval())
                stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                last_step = (i == iterations - 1)
                print_progress(i, last=last_step)
                train_step.run()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                    yield (
                        (None if last_step else i),
                        vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    )
def model_neural_style(pre_train_vgg_path,
                       content_image,
                       style_images,
                       content_weight=5e0,
                       content_weight_blend=1.0,
                       style_weight=5e2,
                       style_layer_weight_exp=1.0,
                       pooling='',
                       initial=None,
                       initial_noiseblend=1.0,
                       tv_weight=1e2,
                       learning_rate=1e1,
                       beta1=0.9,
                       beta2=0.999,
                       epsilon=1e-08,
                       print_iterations=None,
                       iterations=500,
                       checkpoint_iterations=50,
                       preserve_colors=None):
    print "++++++++++++++++++++"
    # input shape of model
    shape = (1, ) + content_image.shape
    style_images_shapes = [(1, ) + style_image.shape
                           for style_image in style_images]
    content_features = {}
    style_features = [{} for _ in style_images]

    # load the weights of pretrained vgg model
    vgg_weights, vgg_mean_pixel = vgg.load_weights(pre_train_vgg_path)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = vgg.net_infer(vgg_weights, image, pooling)
        content_pre = np.array([vgg.preprocess(content_image, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(
                feed_dict={image: content_pre})
    # # for debug
    # for layer in CONTENT_LAYERS:
    #     item = content_features[layer]
    #     item = item.reshape(item.shape[1], item.shape[2], item.shape[3])
    #     item_for_plot = []
    #     for i in range(item.shape[2]):
    #         item_for_plot.append(item[:, :, i])
    #
    #     tools.show_images(item_for_plot[::8], cols=8)
    # compute style features in feedforward mode

    # compute styles features in feedforward mode
    for i in range(len(style_images)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_images_shapes[i])
            net = vgg.net_infer(vgg_weights, image, pooling)
            style_pre = np.array(
                [vgg.preprocess(style_images[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape,
                                     scale=np.std(content_image) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape,
                                     scale=np.std(content_image) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (
                tf.random_normal(shape) *
                0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = vgg.net_infer(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend
        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(
                content_layers_weights[content_layer] * content_weight *
                (2 * tf.nn.l2_loss(net[content_layer] -
                                   content_features[content_layer]) /
                 content_features[content_layer].size))
            content_loss += reduce(tf.add, content_losses)
        # style loss
        style_loss = 0
        for i in range(len(style_images)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value,
                                               layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 *
                                    tf.nn.l2_loss(gram - style_gram) /
                                    style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(
                tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:, 1:, :, :])
        tv_x_size = _tensor_size(image[:, :, 1:, :])
        tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :])
             / tv_y_size) +
            (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :])
             / tv_x_size))

        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2,
                                            epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations
                                 and i % print_iterations == 0):
                    print_progress()
                if (checkpoint_iterations
                        and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = vgg.unprocess(best.reshape(shape[1:]),
                                            vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content_image, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(
                            Image.fromarray(
                                styled_grayscale_rgb.astype(
                                    np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(
                            Image.fromarray(original_image.astype(
                                np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(
                            Image.fromarray(combined_yuv,
                                            'YCbCr').convert('RGB'))

                    yield ((None if last_step else i), img_out)
Beispiel #38
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                print 'Initial feature shape: ', features.shape
                features = np.reshape(features, (-1, features.shape[3]))
                #mask = np.zeros_like(features)
                #mask[:49664/2, :] = 1
                #print 'Mask shape', mask.shape
                print 'Final features shape', features.shape
                #features = features*mask
                gram = np.matmul(features.T, features) / features.size
                print 'Gram matrix shape: ', gram.shape
                style_features[i][layer] = gram

    #sys.exit()
    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                print 'Height, width, number', height, width, number
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                
                #print tf.shape(feats).as_list()
                print 'Height', height
                print 'Weight', width
                print 'Number', number
                print 'Style features shape', style_features[i][style_layer].shape
                print style_layer
                
                if style_layer == 'relu2_1':
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    temp = imread('emma/emma_test_mask.jpg').astype(np.float32)
                    c = temp.reshape(height,2,width,2)
                    temp = c.max(axis=1).max(axis=2)
                    print temp.shape
                    maskt = np.reshape(temp, (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                    #b = mask.reshape(height*width*2, 2, number/2,2)
                    #mask = b.max(axis=1).max(axis=2)
                    #print 'New mask shape', mask.shape
                else:
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    maskt = np.reshape(imread('emma/emma_test_mask.jpg').astype(np.float32), (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                if i == 0:
                    mask = tf.constant(mask)
                    print 'Mask shape', map(lambda i: i.value, mask.get_shape())
                    feats = tf.mul(feats,mask)

                    gram = tf.matmul(tf.transpose(feats), feats) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
                else:
                    mask2 = mask < 1
                    feats2 = tf.mul(feats,mask2)
                    gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)
                train_step.run()
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)

                if i % 10 == 0 and best is not None:
                    tmp_img = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    imsave("iter" + str(i) + ".jpg", tmp_img)

            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
Beispiel #39
0
def stylize(
    network,
    initial,
    content,
    style,
    iterations,
    content_weight,
    style_weight,
    tv_weight,
    learning_rate,
    print_iter=None,
):
    shape = (1,) + content.shape
    style_shape = (1,) + style.shape
    content_features = {}
    style_features = {}

    g = tf.Graph()
    with g.as_default(), g.device("/cpu:0"), tf.Session() as sess:
        image = tf.placeholder("float", shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device("/cpu:0"), tf.Session() as sess:
        image = tf.placeholder("float", shape=style_shape)
        net, _ = vgg.net(network, image)
        style_pre = np.array([vgg.preprocess(style, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / (features.size)
            style_features[layer] = gram

    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 256 / 1000
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype("float32")
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        content_loss = tf.nn.l2_loss(net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / (size)
            style_gram = style_features[i]
            style_losses.append(tf.nn.l2_loss(gram - style_gram))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        tv_loss = tf.nn.l2_loss(image[:, 1:, :, :] - image[:, : shape[1] - 1, :, :]) + tf.nn.l2_loss(
            image[:, :, 1:, :] - image[:, :, : shape[2] - 1, :]
        )
        loss = content_weight * content_loss + style_weight * style_loss + tv_weight * tv_loss

        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                if print_iter is not None and i % print_iter == 0:
                    print "  content loss: %g" % (content_loss.eval())
                    print "    style loss: %g" % (style_loss.eval())
                    print "       tv loss: %g" % (tv_loss.eval())
                    print "    total loss: %g" % loss.eval()
                print "Iteration %d/%d" % (i + 1, iterations)
                train_step.run()
            return vgg.unprocess(image.eval().reshape(shape[1:]), mean_pixel)