Example #1
0
def content_loss(target, prediction, batch_size):
    CONTENT_LAYER = 'relu5_4'
    vgg_dir = '/gdata/huangjie/hdrnet/vgg_pretrained/imagenet-vgg-verydeep-19.mat'
    enhanced_vgg = vgg.net(vgg_dir, vgg.preprocess(prediction * 255))
    dslr_vgg = vgg.net(vgg_dir, vgg.preprocess(target * 255))

    content_size = utils._tensor_size(dslr_vgg[CONTENT_LAYER]) * batch_size
    loss_content = 2 * tf.nn.l2_loss(enhanced_vgg[CONTENT_LAYER] -
                                     dslr_vgg[CONTENT_LAYER]) / content_size
    return tf.reduce_mean(loss_content)
def get_content_features(content_path, content_layers):
    with tf.Graph().as_default() as g:
        image = tf.expand_dims(reader.get_image(content_path, FLAGS.IMAGE_SIZE), 0)
        net, _ = vgg.net(FLAGS.VGG_PATH, image)
        layers = []
        for layer in content_layers:
            layers.append(net[layer])

        with tf.Session() as sess:
            return sess.run(layers + [image])
def get_style_features(style_paths, style_layers):
    with tf.Graph().as_default() as g:
        size = int(round(FLAGS.IMAGE_SIZE * FLAGS.STYLE_SCALE))
        images = tf.pack([reader.get_image(path, size) for path in style_paths])
        net, _ = vgg.net(FLAGS.VGG_PATH, images)
        features = []
        for layer in style_layers:
            features.append(gram(net[layer]))

        with tf.Session() as sess:
            return sess.run(features)
Example #4
0
def get_style_features(style_paths, style_layers, net_type):
    with tf.Graph().as_default() as g:
        size = int(round(FLAGS.image_size * FLAGS.style_scale))
        images = tf.stack(
            [reader.get_image(path, size) for path in style_paths])
        net, _ = vgg.net(FLAGS.vgg_path, images, net_type)
        features = []
        for layer in style_layers:
            features.append(model.gram(net[layer], FLAGS.batch_size))

        with tf.Session() as sess:
            return sess.run(features)
Example #5
0
def main(argv=None):
    style_paths = FLAGS.STYLE_IMAGES.split(',')
    style_layers = FLAGS.STYLE_LAYERS.split(',')
    content_path = FLAGS.CONTENT_IMAGE
    content_layers = FLAGS.CONTENT_LAYERS.split(',')

    style_features_t = get_style_features(style_paths, style_layers)
    res = get_content_features(content_path, content_layers)
    content_features_t, image_t = res[:-1], res[-1]

    image = tf.constant(image_t)
    random = tf.random_normal(image_t.shape)
    initial = tf.Variable(random if FLAGS.RANDOM_INIT else image)

    net, _ = vgg.net(FLAGS.VGG_PATH, initial)

    content_loss = 0
    for content_features, layer in zip(content_features_t, content_layers):
        layer_size = tf.size(content_features)
        content_loss += tf.nn.l2_loss(
            net[layer] - content_features) / tf.to_float(layer_size)
    content_loss = FLAGS.CONTENT_WEIGHT * content_loss / len(content_layers)

    style_loss = 0
    for style_gram, layer in zip(style_features_t, style_layers):
        layer_size = tf.size(style_gram)
        style_loss += tf.nn.l2_loss(gram(net[layer]) -
                                    style_gram) / tf.to_float(layer_size)
        #style_loss += tf.sqrt(tf.reduce_sum(tf.pow(gram(net[layer]) - style_gram, 2)))
    style_loss = FLAGS.STYLE_WEIGHT * style_loss

    tv_loss = FLAGS.TV_WEIGHT * total_variation_loss(initial)

    total_loss = content_loss + style_loss + tv_loss

    train_op = tf.train.AdamOptimizer(FLAGS.LEARNING_RATE).minimize(total_loss)

    output_image = tf.image.encode_png(
        tf.saturate_cast(tf.squeeze(initial) + reader.mean_pixel, tf.uint8))

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        start_time = time.time()
        for step in range(FLAGS.NUM_ITERATIONS):
            _, loss_t, cl, sl = sess.run(
                [train_op, total_loss, content_loss, style_loss])
            elapsed = time.time() - start_time
            start_time = time.time()
            print(step, elapsed, loss_t, cl, sl)
        image_t = sess.run(output_image)
        with open('out.png', 'wb') as f:
            f.write(image_t)
Example #6
0
def main(argv=None):
    style_paths = FLAGS.STYLE_IMAGES.split(',')
    style_layers = FLAGS.STYLE_LAYERS.split(',')
    content_path = FLAGS.CONTENT_IMAGE
    content_layers = FLAGS.CONTENT_LAYERS.split(',')

    style_features_t = get_style_features(style_paths, style_layers)
    res = get_content_features(content_path, content_layers)
    content_features_t, image_t = res[:-1], res[-1]

    image = tf.constant(image_t)
    random = tf.random_normal(image_t.shape)
    initial = tf.Variable(random if FLAGS.RANDOM_INIT else image)

    net, _ = vgg.net(FLAGS.VGG_PATH, initial)

    content_loss = 0
    for content_features, layer in zip(content_features_t, content_layers):
        layer_size = tf.size(content_features)
        content_loss += tf.nn.l2_loss(net[layer] - content_features) / tf.to_float(layer_size)
    content_loss = FLAGS.CONTENT_WEIGHT * content_loss / len(content_layers)

    style_loss = 0
    for style_gram, layer in zip(style_features_t, style_layers):
        layer_size = tf.size(style_gram)
        style_loss += tf.nn.l2_loss(gram(net[layer]) - style_gram) / tf.to_float(layer_size)
        #style_loss += tf.sqrt(tf.reduce_sum(tf.pow(gram(net[layer]) - style_gram, 2)))
    style_loss = FLAGS.STYLE_WEIGHT * style_loss

    tv_loss = FLAGS.TV_WEIGHT * total_variation_loss(initial)

    total_loss = content_loss + style_loss + tv_loss

    train_op = tf.train.AdamOptimizer(FLAGS.LEARNING_RATE).minimize(total_loss)

    output_image = tf.image.encode_png(tf.saturate_cast(tf.squeeze(initial) + reader.mean_pixel, tf.uint8))

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        start_time = time.time()
        for step in range(FLAGS.NUM_ITERATIONS):
            _, loss_t, cl, sl = sess.run([train_op, total_loss, content_loss, style_loss])
            elapsed = time.time() - start_time
            start_time = time.time()
            print(step, elapsed, loss_t, cl, sl)
        image_t = sess.run(output_image)
        with open('out.png', 'wb') as f:
            f.write(image_t)
Example #7
0
def main():
    content_path, style_path, width, style_scale = sys.argv[1:]
    width = int(width)
    style_scale = float(style_scale)

    content_image = imread(content_path)
    style_image = imread(style_path)

    if width > 0:
        new_shape = (int(math.floor(float(content_image.shape[0]) /
                content_image.shape[1] * width)), width)
        content_image = sm.imresize(content_image, new_shape)
    if style_scale > 0:
        style_image = sm.imresize(style_image, style_scale)

    shape = (1,) + content_image.shape
    style_shape = (1,) + style_image.shape

    content_features = {}
    style_features = {}
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(VGG_PATH, image)
        content_pre = np.array([vgg.preprocess(content_image, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shape)
        net, _ = vgg.net(VGG_PATH, image)
        style_pre = np.array([vgg.preprocess(style_image, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            grammatrix = np.matmul(features.T, features)
            style_features[layer] = grammatrix

    g = tf.Graph()
    with g.as_default():
        global_step = tf.Variable(0, trainable=False)
        noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1)
        content_pre = vgg.preprocess(content_image, mean_pixel)
        init = content_pre * (1 - NOISE_RATIO) + noise * NOISE_RATIO
        init = init.astype('float32')
        image = tf.Variable(init)
        net, _ = vgg.net(VGG_PATH, image)

        content_loss = tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats)

            style_gram = style_features[i]

            style_losses.append(tf.nn.l2_loss(gram - style_gram) /
                    (4.0 * number ** 2 * (height * width) ** 2))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        loss = ALPHA * content_loss + BETA * style_loss

        learning_rate = tf.train.exponential_decay(LEARNING_RATE_INITIAL,
                global_step, LEARNING_DECAY_STEPS, LEARNING_DECAY_BASE,
                staircase=True)
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss,
                global_step=global_step)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(100000):
                print 'i = %d' % i
                imsave('%05d.jpg' % i, vgg.unprocess(
                        image.eval().reshape(shape[1:]), mean_pixel))
                train_step.run()
Example #8
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                print 'Initial feature shape: ', features.shape
                features = np.reshape(features, (-1, features.shape[3]))
                #mask = np.zeros_like(features)
                #mask[:49664/2, :] = 1
                #print 'Mask shape', mask.shape
                print 'Final features shape', features.shape
                #features = features*mask
                gram = np.matmul(features.T, features) / features.size
                print 'Gram matrix shape: ', gram.shape
                style_features[i][layer] = gram

    #sys.exit()
    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                print 'Height, width, number', height, width, number
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                
                #print tf.shape(feats).as_list()
                print 'Height', height
                print 'Weight', width
                print 'Number', number
                print 'Style features shape', style_features[i][style_layer].shape
                print style_layer
                
                if style_layer == 'relu2_1':
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    temp = imread('emma/emma_test_mask.jpg').astype(np.float32)
                    c = temp.reshape(height,2,width,2)
                    temp = c.max(axis=1).max(axis=2)
                    print temp.shape
                    maskt = np.reshape(temp, (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                    #b = mask.reshape(height*width*2, 2, number/2,2)
                    #mask = b.max(axis=1).max(axis=2)
                    #print 'New mask shape', mask.shape
                else:
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    maskt = np.reshape(imread('emma/emma_test_mask.jpg').astype(np.float32), (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                if i == 0:
                    mask = tf.constant(mask)
                    print 'Mask shape', map(lambda i: i.value, mask.get_shape())
                    feats = tf.mul(feats,mask)

                    gram = tf.matmul(tf.transpose(feats), feats) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
                else:
                    mask2 = mask < 1
                    feats2 = tf.mul(feats,mask2)
                    gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)
                train_step.run()
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)

                if i % 10 == 0 and best is not None:
                    tmp_img = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    imsave("iter" + str(i) + ".jpg", tmp_img)

            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
Example #9
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None,
        print_image_iterations=False):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)
                train_step.run()
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)
                if (i % 100 == 0) and (print_image_iterations):
					temp_image = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
					temp_output = 'iteration_' + str(i) + '.jpg'
					imsave(temp_output, temp_image)
            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
Example #10
0
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.

    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.

    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        def print_progress(i, last=False):
            global timenow
            stderr.write('Iteration %d/%d, time: %dms\n' % (i + 1, iterations, current_milli_time() - timenow))
            timenow = current_milli_time()
            if last or (print_iterations and i % print_iterations == 0):
                stderr.write('  content loss: %g\n' % content_loss.eval())
                stderr.write('    style loss: %g\n' % style_loss.eval())
                stderr.write('       tv loss: %g\n' % tv_loss.eval())
                stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                last_step = (i == iterations - 1)
                print_progress(i, last=last_step)
                train_step.run()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                    yield (
                        (None if last_step else i),
                        vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    )
def main(argv=None):
    run_id = FLAGS.NAME if FLAGS.NAME else str(uuid.uuid4())
    model_path = '%s/%s' % (FLAGS.MODEL_PATH, run_id)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    summary_path = '%s/%s' % (FLAGS.SUMMARY_PATH, run_id)
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)

    style_paths = FLAGS.STYLE_IMAGES.split(',')
    style_layers = FLAGS.STYLE_LAYERS.split(',')
    content_layers = FLAGS.CONTENT_LAYERS.split(',')

    style_features_t = get_style_features(style_paths, style_layers)

    images = reader.image(FLAGS.BATCH_SIZE, FLAGS.IMAGE_SIZE, FLAGS.TRAIN_IMAGES_PATH)
    generated = model.net(images - reader.mean_pixel, training=True)

    # Put both generated and training images in same batch through VGG net for efficiency
    net, _ = vgg.net(FLAGS.VGG_PATH, tf.concat(0, [generated, images]) - reader.mean_pixel)

    content_loss = 0
    for layer in content_layers:
        generated_images, content_images = tf.split(0, 2, net[layer])
        size = tf.size(generated_images)
        shape = tf.shape(generated_images)
        width = shape[1]
        height = shape[2]
        num_filters = shape[3]
        content_loss += tf.nn.l2_loss(generated_images - content_images) / tf.to_float(size)
    content_loss = content_loss

    style_loss = 0
    for style_grams, layer in zip(style_features_t, style_layers):
        generated_images, _ = tf.split(0, 2, net[layer])
        size = tf.size(generated_images)
        for style_gram in style_grams:
            style_loss += tf.nn.l2_loss(gram(generated_images) - style_gram) / tf.to_float(size)
    style_loss = style_loss / len(style_paths)

    tv_loss = total_variation_loss(generated)

    loss = FLAGS.STYLE_WEIGHT * style_loss + FLAGS.CONTENT_WEIGHT * content_loss + FLAGS.TV_WEIGHT * tv_loss

    global_step = tf.Variable(0, name="global_step", trainable=False)
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step)

    # Statistics
    with tf.name_scope('losses'):
        tf.scalar_summary('content loss', content_loss)
        tf.scalar_summary('style loss', style_loss)
        tf.scalar_summary('regularizer loss', tv_loss)
    with tf.name_scope('weighted_losses'):
        tf.scalar_summary('weighted content loss', content_loss * FLAGS.CONTENT_WEIGHT)
        tf.scalar_summary('weighted style loss', style_loss * FLAGS.STYLE_WEIGHT)
        tf.scalar_summary('weighted regularizer loss', tv_loss * FLAGS.TV_WEIGHT)
        tf.scalar_summary('total loss', loss)
    tf.image_summary('original', images)
    tf.image_summary('generated', generated)

    summary = tf.merge_all_summaries()

    with tf.Session() as sess:
        writer = tf.train.SummaryWriter(summary_path, sess.graph)

        saver = tf.train.Saver(tf.all_variables())
        file = tf.train.latest_checkpoint(model_path)
        sess.run([tf.initialize_all_variables(), tf.initialize_local_variables()])
        if file:
            print('Restoring model from {}'.format(file))
            saver.restore(sess, file)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        start_time = time.time()
        try:
            while not coord.should_stop():
                _, loss_t, step = sess.run([train_op, loss, global_step])
                elapsed_time = time.time() - start_time
                start_time = time.time()
                if step % 100 == 0:
                    print(step, loss_t, elapsed_time)
                    summary_str = sess.run(summary)
                    writer.add_summary(summary_str, step)
                if step % 10000 == 0:
                    saver.save(sess, model_path + '/fast-style-model', global_step=step)
        except tf.errors.OutOfRangeError:
            saver.save(sess, model_path + '/fast-style-model-done')
            print('Done training -- epoch limit reached')
        finally:
            coord.request_stop()
        coord.join(threads)
def main(argv=None):
    if FLAGS.CONTENT_IMAGES_PATH:
        content_images = reader.image(
                FLAGS.BATCH_SIZE,
                FLAGS.IMAGE_SIZE,
                FLAGS.CONTENT_IMAGES_PATH,
                epochs=1,
                shuffle=False,
                crop=False)
        generated_images = model.net(content_images / 255.)

        output_format = tf.saturate_cast(generated_images + reader.mean_pixel, tf.uint8)
        with tf.Session() as sess:
            file = tf.train.latest_checkpoint(FLAGS.MODEL_PATH)
            if not file:
                print('Could not find trained model in {}'.format(FLAGS.MODEL_PATH))
                return
            print('Using model from {}'.format(file))
            saver = tf.train.Saver()
            saver.restore(sess, file)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            i = 0
            start_time = time.time()
            try:
                while not coord.should_stop():
                    print(i)
                    images_t = sess.run(output_format)
                    elapsed = time.time() - start_time
                    start_time = time.time()
                    print('Time for one batch: {}'.format(elapsed))

                    for raw_image in images_t:
                        i += 1
                        misc.imsave('out{0:04d}.png'.format(i), raw_image)
            except tf.errors.OutOfRangeError:
                print('Done training -- epoch limit reached')
            finally:
                coord.request_stop()

            coord.join(threads)
        return

    if not os.path.exists(FLAGS.MODEL_PATH):
        os.makedirs(FLAGS.MODEL_PATH)

    style_paths = FLAGS.STYLE_IMAGES.split(',')
    style_layers = FLAGS.STYLE_LAYERS.split(',')
    content_layers = FLAGS.CONTENT_LAYERS.split(',')

    style_features_t = get_style_features(style_paths, style_layers)

    images = reader.image(FLAGS.BATCH_SIZE, FLAGS.IMAGE_SIZE, FLAGS.TRAIN_IMAGES_PATH)
    generated = model.net(images / 255.)

    net, _ = vgg.net(FLAGS.VGG_PATH, tf.concat(0, [generated, images]))

    content_loss = 0
    for layer in content_layers:
        generated_images, content_images = tf.split(0, 2, net[layer])
        size = tf.size(generated_images)
        content_loss += tf.nn.l2_loss(generated_images - content_images) / tf.to_float(size)
    content_loss = content_loss / len(content_layers)

    style_loss = 0
    for style_gram, layer in zip(style_features_t, style_layers):
        generated_images, _ = tf.split(0, 2, net[layer])
        size = tf.size(generated_images)
        for style_image in style_gram:
            style_loss += tf.nn.l2_loss(tf.reduce_sum(gram(generated_images) - style_image, 0)) / tf.to_float(size)
    style_loss = style_loss / len(style_layers)

    loss = FLAGS.STYLE_WEIGHT * style_loss + FLAGS.CONTENT_WEIGHT * content_loss + FLAGS.TV_WEIGHT * total_variation_loss(generated)

    global_step = tf.Variable(0, name="global_step", trainable=False)
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step)

    output_format = tf.saturate_cast(tf.concat(0, [generated, images]) + reader.mean_pixel, tf.uint8)

    with tf.Session() as sess:
        saver = tf.train.Saver(tf.all_variables())
        file = tf.train.latest_checkpoint(FLAGS.MODEL_PATH)
        if file:
            print('Restoring model from {}'.format(file))
            saver.restore(sess, file)
        else:
            print('New model initilized')
            sess.run(tf.initialize_all_variables())

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        start_time = time.time()
        try:
            while not coord.should_stop():
                _, loss_t, step = sess.run([train_op, loss, global_step])
                elapsed_time = time.time() - start_time
                start_time = time.time()
                if step % 100 == 0:
                    print(step, loss_t, elapsed_time)
                    output_t = sess.run(output_format)
                    for i, raw_image in enumerate(output_t):
                        misc.imsave('out{}.png'.format(i), raw_image)
                if step % 10000 == 0:
                    saver.save(sess, FLAGS.MODEL_PATH + '/fast-style-model', global_step=step)
        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        finally:
            coord.request_stop()

        coord.join(threads)
Example #13
0
def optimize(content_targets, style_target, content_weight, style_weight,
             tv_weight, vgg_path, epochs=2, print_iterations=1000,
             batch_size=4, save_path='saver/fns.ckpt', slow=False,
             learning_rate=1e-3, debug=False):
    if slow:
        batch_size = 1
    mod = len(content_targets) % batch_size
    if mod > 0:
        print("Train set has been trimmed slightly..")
        content_targets = content_targets[:-mod] 

    style_features = {}

    batch_shape = (batch_size,256,256,3)
    style_shape = (1,) + style_target.shape
    print(style_shape)

    # precompute style features
    with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
        style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image')
        style_image_pre = vgg.preprocess(style_image)
        net = vgg.net(vgg_path, style_image_pre)
        style_pre = np.array([style_target])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={style_image:style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[layer] = gram

    with tf.Graph().as_default(), tf.Session() as sess:
        X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content")
        X_pre = vgg.preprocess(X_content)

        # precompute content features
        content_features = {}
        content_net = vgg.net(vgg_path, X_pre)
        content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]

        if slow:
            preds = tf.Variable(
                tf.random_normal(X_content.get_shape()) * 0.256
            )
            preds_pre = preds
        else:
            preds = transform.net(X_content/255.0)
            preds_pre = vgg.preprocess(preds)

        net = vgg.net(vgg_path, preds_pre)

        content_size = _tensor_size(content_features[CONTENT_LAYER])*batch_size
        assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(net[CONTENT_LAYER])
        content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_size
        )

        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            bs, height, width, filters = map(lambda i:i.value,layer.get_shape())
            size = height * width * filters
            feats = tf.reshape(layer, (bs, height * width, filters))
            feats_T = tf.transpose(feats, perm=[0,2,1])
            grams = tf.matmul(feats_T, feats) / size
            style_gram = style_features[style_layer]
            style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size)

        style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size

        # total variation denoising
        tv_y_size = _tensor_size(preds[:,1:,:,:])
        tv_x_size = _tensor_size(preds[:,:,1:,:])
        y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:batch_shape[1]-1,:,:])
        x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[2]-1,:])
        tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size

        loss = content_loss + style_loss + tv_loss

        # overall loss
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        sess.run(tf.global_variables_initializer())
        import random
        uid = random.randint(1, 100)
        print("UID: %s" % uid)
        for epoch in range(epochs):
            num_examples = len(content_targets)
            iterations = 0
            while iterations * batch_size < num_examples:
                start_time = time.time()
                curr = iterations * batch_size
                step = curr + batch_size
                X_batch = np.zeros(batch_shape, dtype=np.float32)
                for j, img_p in enumerate(content_targets[curr:step]):
                   X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32)

                iterations += 1
                assert X_batch.shape[0] == batch_size

                feed_dict = {
                   X_content:X_batch
                }

                train_step.run(feed_dict=feed_dict)
                end_time = time.time()
                delta_time = end_time - start_time
                if debug:
                    print("UID: %s, batch time: %s" % (uid, delta_time))
                is_print_iter = int(iterations) % print_iterations == 0
                if slow:
                    is_print_iter = epoch % print_iterations == 0
                is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
                should_print = is_print_iter or is_last
                if should_print:
                    to_get = [style_loss, content_loss, tv_loss, loss, preds]
                    test_feed_dict = {
                       X_content:X_batch
                    }

                    tup = sess.run(to_get, feed_dict = test_feed_dict)
                    _style_loss,_content_loss,_tv_loss,_loss,_preds = tup
                    losses = (_style_loss, _content_loss, _tv_loss, _loss)
                    if slow:
                       _preds = vgg.unprocess(_preds)
                    else:
                       saver = tf.train.Saver()
                       res = saver.save(sess, save_path)
                    yield(_preds, losses, iterations, epoch)
Example #14
0
def stylize(
    network,
    initial,
    content,
    style,
    iterations,
    content_weight,
    style_weight,
    tv_weight,
    learning_rate,
    print_iter=None,
):
    shape = (1,) + content.shape
    style_shape = (1,) + style.shape
    content_features = {}
    style_features = {}

    g = tf.Graph()
    with g.as_default(), g.device("/cpu:0"), tf.Session() as sess:
        image = tf.placeholder("float", shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device("/cpu:0"), tf.Session() as sess:
        image = tf.placeholder("float", shape=style_shape)
        net, _ = vgg.net(network, image)
        style_pre = np.array([vgg.preprocess(style, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / (features.size)
            style_features[layer] = gram

    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 256 / 1000
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype("float32")
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        content_loss = tf.nn.l2_loss(net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / (size)
            style_gram = style_features[i]
            style_losses.append(tf.nn.l2_loss(gram - style_gram))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        tv_loss = tf.nn.l2_loss(image[:, 1:, :, :] - image[:, : shape[1] - 1, :, :]) + tf.nn.l2_loss(
            image[:, :, 1:, :] - image[:, :, : shape[2] - 1, :]
        )
        loss = content_weight * content_loss + style_weight * style_loss + tv_weight * tv_loss

        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                if print_iter is not None and i % print_iter == 0:
                    print "  content loss: %g" % (content_loss.eval())
                    print "    style loss: %g" % (style_loss.eval())
                    print "       tv loss: %g" % (tv_loss.eval())
                    print "    total loss: %g" % loss.eval()
                print "Iteration %d/%d" % (i + 1, iterations)
                train_step.run()
            return vgg.unprocess(image.eval().reshape(shape[1:]), mean_pixel)
Example #15
0
# Network parameters
VGG_19_PATH = 'models/imagenet-vgg-verydeep-19.mat'
LEARNING_RATE = 10
POOLING_FUNCTION = 'MAX'

# Load images
content_image = utils.read_image(CONTENT_PATH)
style_image = utils.read_image(STYLE_PATH)

g = tf.Graph()
with g.device(DEVICE), g.as_default(), tf.Session() as sess:
    # 1. Compute content representation
    print("1. Computing content representation...")
    content_shape = (1,) + content_image.shape  # add batch size dimension
    x = tf.placeholder(tf.float32, content_shape)
    net, activations, img_mean = vgg.net(VGG_19_PATH, x, pooling_function=POOLING_FUNCTION)

    # Pre-process image
    content_image_pp = utils.preprocess_image(content_image, img_mean)

    content_representation = activations[CONTENT_LAYER].eval(feed_dict={x: np.array([content_image_pp])})

    # 2. Compute style Gram matrices
    print("2. Computing style Gram matrices...")
    style_shape = (1,) + style_image.shape  # add batch size dimension
    x = tf.placeholder(tf.float32, style_shape)
    net, activations, _ = vgg.net(VGG_19_PATH, x, pooling_function=POOLING_FUNCTION)

    # Pre-process image
    style_image_pp = utils.preprocess_image(style_image, img_mean)
Example #16
0
def main():
    content_path, style_path, width, style_scale = sys.argv[1:]
    width = int(width)
    style_scale = float(style_scale)

    content_image = imread(content_path)
    style_image = imread(style_path)

    if width > 0:
        new_shape = (int(math.floor(float(content_image.shape[0]) /
                content_image.shape[1] * width)), width)
        content_image = sm.imresize(content_image, new_shape)
    if style_scale > 0:
        style_image = sm.imresize(style_image, style_scale)

    shape = (1,) + content_image.shape
    style_shape = (1,) + style_image.shape

    content_features = {}
    style_features = {}
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(VGG_PATH, image)
        content_pre = np.array([vgg.preprocess(content_image, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shape)
        net, _ = vgg.net(VGG_PATH, image)
        style_pre = np.array([vgg.preprocess(style_image, mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / (features.size)
            style_features[layer] = gram

    with tf.Graph().as_default():
        noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1)
        init = tf.random_normal(shape) * 256 / 1000
        image = tf.Variable(init)
        net, _ = vgg.net(VGG_PATH, image)

        content_loss = tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
        style_losses = []
        for i in STYLE_LAYERS:
            layer = net[i]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / (size)
            style_gram = style_features[i]
            style_losses.append(tf.nn.l2_loss(gram - style_gram))
        style_loss = reduce(tf.add, style_losses) / len(style_losses)
        tv_loss = (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) +
                tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]))
        loss = ALPHA * content_loss + BETA * style_loss + TV_WEIGHT * tv_loss

        train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(100000):
                print 'i = %d' % i
                if i % 10 == 0:
                    print '\tcontent_loss = %15.0f' % content_loss.eval()
                    print '\tstyle_loss   = %15.0f' % style_loss.eval()
                    print '\ttv_loss      = %15.0f' % tv_loss.eval()
                    print '\tloss         = %15.0f' % loss.eval()
                imsave('%05d.jpg' % i, vgg.unprocess(
                        image.eval().reshape(shape[1:]), mean_pixel))
                train_step.run()
def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = vgg.net(network, image)
        content_pre = np.array([vgg.preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = vgg.net(network, image)
            style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                print 'Initial feature shape: ', features.shape
                features = np.reshape(features, (-1, features.shape[3]))
                #mask = np.zeros_like(features)
                #mask[:49664/2, :] = 1
                #print 'Mask shape', mask.shape
                print 'Final features shape', features.shape
                #features = features*mask
                gram = np.matmul(features.T, features) / features.size
                print 'Gram matrix shape: ', gram.shape
                style_features[i][layer] = gram

    #sys.exit()
    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([vgg.preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = vgg.net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                print 'Height, width, number', height, width, number
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                
                #print tf.shape(feats).as_list()

                if normal_flag == 0:
                    mask = np.zeros((height*width, number), dtype=np.float32)
                    maskt = np.reshape(imread('bottle_mask.jpg').astype(np.float32), (height*width,))
                    maskt = maskt > 100
                    for d in xrange(number):
                        mask[:,d] = maskt
                    print 'Mask shape', mask.shape
                #print sum(sum(mask == 1)) + sum(sum(mask == 0))
                #mask[:height*width/2, :] = 1
                    if i == 0:
                        mask = tf.constant(mask)
                        feats = tf.mul(feats,mask)

                        gram = tf.matmul(tf.transpose(feats), feats) / size
                        style_gram = style_features[i][style_layer]
                        style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
                    else:
                        mask2 = mask < 1
                        feats2 = tf.mul(feats,mask2)
                        gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                        style_gram = style_features[i][style_layer]
                        style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)

                else:
                    feats2 = feats
                    gram2 = tf.matmul(tf.transpose(feats2), feats2) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size)
                    pass



            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        if normal_flag != 0:
            print "general mask :"
            mask = np.zeros((height*width, number), dtype=np.float32)
            maskt = np.reshape(imread('bottle_mask.jpg').astype(np.float32), (height*width,))
            maskt = maskt > 100
            # for d in xrange(3):
            #     mask[:,d] = maskt
            print 'Mask shape', maskt.shape
            maskt = maskt.reshape((height,width))

            maskt = np.array([maskt,maskt,maskt])
            maskt = maskt.transpose((1,2,0))
            mask = tf.constant(maskt, dtype=tf.float32)
            # feats = tf.mul(feats,mask)

        def capper(a,b,mask):
            # (1, 468, 304, 3)
            print "orig shape", a
            reshaped_in_grad = tf.reshape(a,[-1] )
            print "reshaped grad", reshaped_in_grad

            print "mask" ,mask
            g = tf.mul(a,mask)
            # g = tf.reshape(g, (1,height,width,3))
            # print a,b
            # print g
            return g,b


        # optimizer setup
        # train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        #         # Create an optimizer.
        train_step = tf.train.GradientDescentOptimizer(learning_rate)
         # # Compute the gradients for a list of variables.
        grads_and_vars = train_step.compute_gradients(loss)
        # # grads_and_vars is a list of tuples (gradient, variable).  Do whatever you
        # # need to the 'gradient' part, for example cap them, etc.
        capped_grads_and_vars = [(capper(gv[0], gv[1], mask)) for gv in grads_and_vars]
        # # Ask the optimizer to apply the capped gradients.
        train_step = train_step.apply_gradients(capped_grads_and_vars)

        # opt_op = opt.minimize(cost, var_list=<list of variables>)

        def print_progress(i, last=False):
            if print_iterations is not None:
                if i is not None and i % print_iterations == 0 or last:
                    print >> stderr, '  content loss: %g' % content_loss.eval()
                    print >> stderr, '    style loss: %g' % style_loss.eval()
                    print >> stderr, '       tv loss: %g' % tv_loss.eval()
                    print >> stderr, '    total loss: %g' % loss.eval()

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                print_progress(i)
                print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)

                

               

                train_step.run()



                # print "runningstep: ",i, running_step
                if (checkpoint_iterations is not None and
                        i % checkpoint_iterations == 0) or i == iterations - 1:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                print_progress(None, i == iterations - 1)

                if i % 10 == 0 and best is not None:
                    tmp_img = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                    imsave("iter" + str(i) + ".jpg", tmp_img)

            return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)