コード例 #1
0
def render(content_file, style_file,
           content_region_file=None, style_region_file=None,
           random_init=False, load_saved_mapping=True, load_trained_image=False, blur_mapping=True,
           height=None, width=None,
           content_ratio=0., style3_ratio=3., style4_ratio=1., gram_ratio=0.001, diff_ratio=0.,
           epochs=300, output_file="./train/output%d.jpg"):
    """
    Render the synthesis with single generation.
    - Best used if style has high similarity with the content
    - If any ratio is set to 0, the corresponding Tensor will not be generated
    - Pure Gram Matrix synthesis is best for painting abstract style. (gram_ratio = 1 and all others 0)

    :param content_file:            String file path of content image
    :param style_file:              String file path of style image
    :param content_region_file:     String file path of region mapping of content
    :param style_region_file:       String file path of region mapping of image
    :param random_init:             True to init the image with random
    :param load_saved_mapping:      True to use saved mapping file
    :param load_trained_image:      True to use saved training
    :param blur_mapping:            True to blur the mapping before calculate the max argument
    :param height:                  int of height of result image
    :param width:                   int of width of result image. Leaving None with height will scaled
                                    according aspect ratio
    :param content_ratio:           float32 of weight of content cost
    :param style3_ratio:            float32 of weight of patch cost of conv3 layer
    :param style4_ratio:            float32 of weight of patch cost of conv4 layer
    :param gram_ratio:              float32 of weight of gram matrix cost
    :param diff_ratio:              float32 of weight of local different cost
    :param epochs:                  int of number of epochs to train
    :param output_file:             String file name of output file. %d will be replaced running number
    """
    print("render started:")

    # print info:
    frame = inspect.currentframe()
    args, _, _, values = inspect.getargvalues(frame)
    for i in args:
        print("    %s = %s" % (i, values[i]))

    content_np = stylenet_core.load_image(content_file, height, width)
    style_np = stylenet_core.load_image(style_file, content_np.shape[0], content_np.shape[1])

    content_batch = np.expand_dims(content_np, 0)
    style_batch = np.expand_dims(style_np, 0)

    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
    # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) as sess:

    tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    tf_config.gpu_options.allow_growth = True
    with tf.Session(config=tf_config) as sess:
        start_time = time.time()

        contents = tf.constant(content_batch, dtype=tf.float32, shape=content_batch.shape)
        styles = tf.constant(style_batch, dtype=tf.float32, shape=style_batch.shape)

        if random_init:
            var_image = tf.Variable(tf.truncated_normal(content_batch.shape, 0.5, 0.1))
        else:
            var_image = tf.Variable(contents)

        vgg_content = custom_vgg19.Vgg19()
        with tf.name_scope("content_vgg"):
            vgg_content.build(contents)

        vgg_style = custom_vgg19.Vgg19()
        with tf.name_scope("style_vgg"):
            vgg_style.build(styles)

        vgg_var = custom_vgg19.Vgg19()
        with tf.name_scope("variable_vgg"):
            vgg_var.build(var_image)

        with tf.name_scope("cost"):
            # style:
            # TODO change file name based on out file name
            style3file = "./train/%s-style_map_3" % (
                get_filename(content_file) + "-" + get_filename(style_file))
            style4file = "./train/%s-style_map_4" % (
                get_filename(content_file) + "-" + get_filename(style_file))

            if content_region_file is None or style_region_file is None:
                if style3_ratio is 0:
                    style_cost_3 = tf.constant(0.0)
                else:
                    style_cost_3 = stylenet_core.get_style_cost_patch2(sess, vgg_var.conv3_1,
                                                                       vgg_content.conv3_1,
                                                                       vgg_style.conv3_1,
                                                                       style3file,
                                                                       load_saved_mapping=load_saved_mapping)
                if style4_ratio is 0:
                    style_cost_4 = tf.constant(0.0)
                else:
                    style_cost_4 = stylenet_core.get_style_cost_patch2(sess, vgg_var.conv4_1,
                                                                       vgg_content.conv4_1,
                                                                       vgg_style.conv4_1,
                                                                       style4file,
                                                                       load_saved_mapping=load_saved_mapping)
            else:
                content_regions_np = stylenet_core.load_image(content_region_file, content_np.shape[0],
                                                              content_np.shape[1])
                style_regions_np = stylenet_core.load_image(style_region_file, content_np.shape[0],
                                                            content_np.shape[1])
                content_regions_batch = np.expand_dims(content_regions_np, 0)
                style_regions_batch = np.expand_dims(style_regions_np, 0)
                content_regions = tf.constant(content_regions_batch, dtype=tf.float32,
                                              shape=content_regions_batch.shape)
                style_regions = tf.constant(style_regions_batch, dtype=tf.float32,
                                            shape=style_regions_batch.shape)

                content_regions = vgg_var.avg_pool(content_regions, None)
                content_regions = vgg_var.avg_pool(content_regions, None)
                style_regions = vgg_var.avg_pool(style_regions, None)
                style_regions = vgg_var.avg_pool(style_regions, None)

                if style3_ratio is 0:
                    style_cost_3 = tf.constant(0.0)
                else:
                    style_cost_3 = stylenet_core.get_style_cost_patch2(sess,
                                                                       vgg_var.conv3_1,
                                                                       vgg_content.conv3_1,
                                                                       vgg_style.conv3_1,
                                                                       style3file,
                                                                       content_regions,
                                                                       style_regions,
                                                                       load_saved_mapping,
                                                                       blur_mapping=blur_mapping)

                content_regions = vgg_var.avg_pool(content_regions, None)
                style_regions = vgg_var.avg_pool(style_regions, None)

                if style4_ratio is 0:
                    style_cost_4 = tf.constant(0.0)
                else:
                    style_cost_4 = stylenet_core.get_style_cost_patch2(sess,
                                                                       vgg_var.conv4_1,
                                                                       vgg_content.conv4_1,
                                                                       vgg_style.conv4_1,
                                                                       style4file,
                                                                       content_regions,
                                                                       style_regions,
                                                                       load_saved_mapping,
                                                                       blur_mapping=blur_mapping)

            if gram_ratio is 0:
                style_cost_gram = tf.constant(0.0)
            else:
                style_cost_gram = stylenet_core.get_style_cost_gram(sess, vgg_style, vgg_var)

            # content:
            if content_ratio is 0:
                content_cost = tf.constant(.0)
            else:
                fixed_content = stylenet_core.get_constant(sess, vgg_content.conv4_2)
                content_cost = stylenet_core.l2_norm_cost(vgg_var.conv4_2 - fixed_content)

            # # smoothness:
            if diff_ratio is 0:
                diff_cost = tf.constant(.0)
            else:
                diff_filter_h = tf.constant([0, 0, 0, 0, -1, 1, 0, 0, 0], tf.float32, [3, 3, 1, 1])
                diff_filter_h = tf.concat([diff_filter_h, diff_filter_h, diff_filter_h], 2)
                diff_filter_v = tf.constant([0, 0, 0, 0, -1, 0, 0, 1, 0], tf.float32, [3, 3, 1, 1])
                diff_filter_v = tf.concat([diff_filter_v, diff_filter_v, diff_filter_v], 2)
                diff_filter = tf.concat([diff_filter_h, diff_filter_v], 3)
                filtered_input = tf.nn.conv2d(var_image, diff_filter, [1, 1, 1, 1], "VALID")
                diff_cost = stylenet_core.l2_norm_cost(filtered_input) * 1e7

            content_cost = content_cost * content_ratio
            style_cost_3 = style_cost_3 * style3_ratio
            style_cost_4 = style_cost_4 * style4_ratio
            style_cost_gram = style_cost_gram * gram_ratio
            diff_cost = diff_cost * diff_ratio
            cost = content_cost + style_cost_3 + style_cost_4 + style_cost_gram + diff_cost

        with tf.name_scope("train"):
            global_step = tf.Variable(0, name='global_step', trainable=False)

            optimizer = tf.train.AdamOptimizer(learning_rate=0.02)
            gvs = optimizer.compute_gradients(cost)

            training = optimizer.apply_gradients(gvs, global_step=global_step)

        print("Net generated:", (time.time() - start_time))
        start_time = time.time()

        with tf.name_scope("image_out"):
            image_out = tf.clip_by_value(tf.squeeze(var_image, [0]), 0, 1)

        # saver = tf.train.Saver(max_to_keep=1)

        # checkpoint = tf.train.get_checkpoint_state("./train")
        # if checkpoint and checkpoint.model_checkpoint_path and load_trained_image:
        #     saver.restore(sess, checkpoint.model_checkpoint_path)
        #     print("save restored:", checkpoint.model_checkpoint_path)
        # else:
        sess.run(tf.global_variables_initializer())
        print("all variables init")

        print("Var init: %d" % (time.time() - start_time))

        step_out = 0
        start_time = time.time()
        for i in range(epochs):
            if i % 5 == 0:
                img = sess.run(image_out)
                img_out_path = output_file % step_out
                skimage.io.imsave(img_out_path, img)
                print("img saved: ", img_out_path)

            step_out, content_out, style_patch3_out, style_patch4_out, style_gram_out, diff_cost_out, cost_out \
                , _ = sess.run(
                [global_step, content_cost, style_cost_3, style_cost_4, style_cost_gram, diff_cost, cost,
                 training])

            duration = time.time() - start_time
            print("Step %d: cost:%.10f\t(%.1f sec)" % (step_out, cost_out, duration), \
                "\t content:%.5f, style_3:%.5f, style_4:%.5f, gram:%.5f, diff_cost_out:%.5f" \
                % (content_out, style_patch3_out, style_patch4_out, style_gram_out, diff_cost_out))

            # if (i + 1) % 10 == 0:
            #     saved_path = saver.save(sess, "./train/saves-" + get_filename(content_file),
            #                             global_step=global_step)
            #     print("net saved: ", saved_path)

        img = sess.run(image_out)
        img_out_path = output_file % step_out
        skimage.io.imsave(img_out_path, img)
        print("img saved: ", img_out_path)
コード例 #2
0
    img = skimage.io.imread("tex1-b.png", as_grey=True) / 255.0
    # img = np.array([[0.4,0.6,0.1,0.2],[0.1,0.5,0.5,0],[1,1,0.4,0.3],[0.2,0.2,0.5,0.8]])
    timg = tf.convert_to_tensor(img, dtype=tf.float32)
    # timg = tf.expand_dims(timg, 2)
    # print(timg.shape)

    gen_sm = twopoint_correlation_layer(noise)
    sm = twopoint_correlation_layer(timg[:height, :width])

    tpc_loss = l2_loss(gen_sm - sm)

    # compute style loss

    texture_img, texture_shape = load_image("tex3.png")
    # h,w,c
    texture_model = vgg19.Vgg19()
    texture_model.build(texture_img, texture_shape[1:])

    rgbnoise = tf.expand_dims(noise, 2)
    rgbnoise = tf.image.grayscale_to_rgb(rgbnoise)
    rgbnoise = tf.expand_dims(rgbnoise, 0)
    # print(rgbnoise.shape)
    x_model = vgg19.Vgg19()
    x_model.build(rgbnoise, rgbnoise.shape.as_list()[1:])

    style_loss = get_texture_loss(x_model, texture_model)

    total_loss = Alpha * tpc_loss + Beta * style_loss

    optimizer = tf.train.AdamOptimizer(0.02).minimize(total_loss,
                                                      var_list=noise)
コード例 #3
0
ファイル: test.py プロジェクト: jainszhang/LearnDM
                        help="path to where the styled image will be created")
    args = parser.parse_args()

    # Assign image paths from the arg parsing
    INPUT_PATH = os.path.realpath(args.input)
    STYLE_PATH = os.path.realpath(args.style)
    OUT_PATH = os.path.realpath(args.out)


with tf.Session() as sess:
    parse_args()  #about path of images and parse paras

    photo, image_shape = utils.load_image(INPUT_PATH)  #load image
    image_shape = [1] + image_shape
    photo = photo.reshape(image_shape).astype(np.float32)

    art = utils.load_image2(STYLE_PATH,
                            height=image_shape[1],
                            width=image_shape[2])
    art = art.reshape(image_shape).astype(np.float32)

    # Initialize the variable image that will become our final output as random noise
    noise = tf.Variable(tf.truncated_normal(image_shape, mean=.5, stddev=.1))

    # VGG Networks Init
    with tf.name_scope('vgg_content'):
        content_model = vgg19.Vgg19()
        content_model.build(photo, image_shape[1:])

tf.placeholder(shape=None, dtype=np.float)
コード例 #4
0
    # Initialize and process photo image to be used for our content
    photo, image_shape = utils.load_image('../lib/images/content/2.jpg')
    image_shape = [1] + image_shape
    photo = photo.reshape(image_shape).astype(np.float32)

    # Initialize and process art image to be used for our style
    art = utils.load_image2('../lib/images/style/starry-night.jpg', height=image_shape[1], width=image_shape[2])
    art = art.reshape(image_shape).astype(np.float32)

    # Initialize the variable image that will become our final output as random noise
    noise = tf.Variable(tf.truncated_normal(image_shape, mean=.5, stddev=.1))

    # VGG Networks Init
    with tf.name_scope('vgg_content'):
        content_model = vgg19.Vgg19()
        content_model.build(photo, image_shape[1:])

    with tf.name_scope('vgg_style'):
        style_model = vgg19.Vgg19()
        style_model.build(art, image_shape[1:])

    with tf.name_scope('vgg_x'):
        x_model = vgg19.Vgg19()
        x_model.build(noise, image_shape[1:])

    # Loss functions
    with tf.name_scope('loss'):
        # Content
        if CONTENT_WEIGHT is 0:
            content_loss = tf.constant(0.)
コード例 #5
0
ファイル: trainer.py プロジェクト: mainyaa/pixai
def train(ds, dis_learning_rate, gen_learning_rate):
    with tf.Session() as sess:
        start_time = time.time()

        in_train_gen = tf.placeholder(tf.bool)
        in_train_dis = tf.placeholder(tf.bool)
        in_large = tf.placeholder(tf.float32, [1, SIZE, SIZE, 3])
        in_small = tf.placeholder(tf.float32, [1, SIZE, SIZE, 3])
        """
        # extra difficulty: blur the large image:
        blur_filter = tf.constant(1, shape=[5, 5, 1, 1], dtype=tf.float32) / 25
        blur_filter = tf.tile(blur_filter, [1, 1, 3, 1])
        in_large_blur = tf.nn.depthwise_conv2d(in_large, blur_filter, strides=[1, 1, 1, 1], padding='SAME')

        # reduce the size to smaller
        in_small = tf.nn.avg_pool(in_large_blur, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='SAME')

        # use stitch training method, slice the image into tiles and concat as batches
        t = create_tiles(in_small, SIZE / 4, SIZE / 4, 4)
        in_stitch = tf.concat(0, [tf.concat(0, t[y]) for y in xrange(4)])  # row1, row2, ...
        """

        generator = TensorZoomNet(trainable=True, npy_path=GEN_NPY)
        with tf.name_scope("generator"):
            generator.build(in_small, train_mode=in_train_gen)

        # stitch the tiles back together after split the batches
        gen_split = tf.split(0, 4 * 4, generator.output)
        gen_result = tf.concat(1, [
            tf.concat(2, [gen_split[x] for x in xrange(4 * y, 4 * y + 4)])
            for y in xrange(4)
        ])

        discriminator_truth = Discriminator(trainable=True,
                                            input_size=SIZE,
                                            npy_path=DIS_NPY)
        with tf.name_scope('dis_truth'):
            discriminator_truth.build(in_large, train_mode=in_train_dis)

        discriminator_gen = Discriminator(trainable=True, input_size=SIZE)
        with tf.name_scope('dis_gen'):
            discriminator_gen.build(gen_result,
                                    train_mode=in_train_dis,
                                    parent=discriminator_truth)

        vgg_content = custom_vgg19.Vgg19(vgg19_npy_path=VGG_NPY_PATH)
        with tf.name_scope("content_vgg"):
            vgg_content.build(in_large)

        vgg_var = custom_vgg19.Vgg19(var_map=vgg_content.var_map)
        with tf.name_scope("variable_vgg"):
            vgg_var.build(gen_result)

        prob_truth = discriminator_truth.prob
        prob_gen = discriminator_gen.prob

        prob_truth_mean = tf.reduce_mean(prob_truth)
        prob_gen_mean = tf.reduce_mean(prob_gen)

        with tf.name_scope("cost"):
            gen_cost_content = tf.sqrt(
                tf.reduce_mean(tf.square(vgg_var.conv2_2 -
                                         vgg_content.conv2_2)))
            gen_cost_generator = -tf.log(
                tf.clip_by_value(prob_gen_mean, 1e-10, 1.0)) * 2
            gen_cost_invariant = get_invariant_cost2(gen_result)

            # for pre-train (purely with conv22): don't set these 2 cost
            # gen_cost_generator = tf.constant(0.0)  # for pre train
            # gen_cost_invariant = tf.constant(0.0)  # for pre train

            gen_cost = gen_cost_content + gen_cost_generator + gen_cost_invariant

            dis_cost = tf.reduce_mean(
                -(tf.log(prob_truth) +
                  tf.log(tf.clip_by_value(1 - prob_gen, 1e-10, 1.0))))

        with tf.name_scope("train"):
            gen_step = tf.Variable(0, name='gen_step', trainable=False)
            gen_train = tf.train.AdamOptimizer(learning_rate=gen_learning_rate) \
                .minimize(gen_cost, gen_step, var_list=generator.var_list())

            dis_train = tf.train.AdamOptimizer(learning_rate=dis_learning_rate) \
                .minimize(dis_cost, var_list=discriminator_truth.get_all_var())

        print "Net generated: %d" % (time.time() - start_time)
        start_time = time.time()

        # analysis
        for name, var in generator.var_dict.items():
            tf.histogram_summary(name, var)
        for name, var in discriminator_truth.var_dict_name.items():
            tf.histogram_summary(name, var)
        tf.scalar_summary("gen_cost", gen_cost)
        tf.scalar_summary("gen_cost_content", gen_cost_content)
        tf.scalar_summary("gen_cost_generator", gen_cost_generator)
        tf.scalar_summary("gen_cost_invariant", gen_cost_invariant)
        tf.scalar_summary("dis_cost", dis_cost)
        tf.scalar_summary("prob_truth", prob_truth_mean)
        tf.scalar_summary("prob_gen", prob_gen_mean)
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(SUMMARY_FOLDER,
                                                graph=sess.graph)

        saver = tf.train.Saver()

        ckpt = tf.train.get_checkpoint_state(TRAIN_DIR)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print "save restored:" + ckpt.model_checkpoint_path
        else:
            tf.initialize_all_variables().run()
            print "all variables init"

        print "Var init: %d" % (time.time() - start_time)

        start_time = time.time()
        for i in xrange(80000):
            # disable this part for pre-train with conv22
            # train discriminator:
            feed_dict = {
                in_large: get_next_batch(ds),
                in_train_dis: True,
                in_train_gen: False
            }

            _, \
            dis_cost_out, \
            prob_truth_out, \
            prob_gen_out \
                = sess.run([
                dis_train,
                dis_cost,
                prob_truth_mean,
                prob_gen_mean
            ], feed_dict)

            print "dis-step:\t\t\t\t\t " \
                  "dis-cost:%.10f\t\t " \
                  "prob_gen:%.10f\t " \
                  "prob_truth:%.10f" \
                  % (
                      dis_cost_out,
                      prob_gen_out,
                      prob_truth_out
                  )

            if math.isnan(dis_cost_out):
                raise Exception("error found")

            # train generator:
            feed_dict = {
                in_large: get_next_batch(ds),
                in_train_dis: False,
                in_train_gen: True
            }

            step_out, \
            _, \
            gen_cost_out, \
            cost_content_out, \
            cost_generator_out, \
            cost_invariant_out, \
            prob_gen_out \
                = sess.run([
                gen_step,
                gen_train,
                gen_cost,
                gen_cost_content,
                gen_cost_generator,
                gen_cost_invariant,
                prob_gen_mean
            ], feed_dict)

            duration = time.time() - start_time
            print "step: %d, " \
                  "\t(%.1f sec)\t " \
                  "gen-cost:%.10f\t " \
                  "prob_gen:%.10f,\t " \
                  "gen_cost_content:%.2f,\t " \
                  "gen_cost_generator:%.5f,\t " \
                  "gen_cost_invariant:%.5f" \
                  % (
                      step_out,
                      duration,
                      gen_cost_out,
                      prob_gen_out,
                      cost_content_out,
                      cost_generator_out,
                      cost_invariant_out
                  )

            if math.isnan(gen_cost_out):
                raise Exception("error found")

            if i == 0 or i == 9 or i == 49 or step_out % 100 == 0:
                feed_dict[in_train_dis] = False
                feed_dict[in_train_gen] = False

                summary_str = sess.run(summary_op, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step_out)

                if step_out % 2000 == 0:
                    generator.save_npy(
                        sess, TRAIN_DIR + "/save-gen-%d.npy" % step_out)
                    discriminator_truth.save_npy(
                        sess, TRAIN_DIR + "/save-dis-%d.npy" % step_out)
                else:
                    generator.save_npy(sess, TRAIN_DIR + "/save-gen.npy")
                    discriminator_truth.save_npy(sess,
                                                 TRAIN_DIR + "/save-dis.npy")

                saved_path = saver.save(sess,
                                        TRAIN_DIR + "/saves",
                                        global_step=gen_step,
                                        write_meta_graph=False)
                print "net saved: " + saved_path

                # print image
                gen_out = sess.run(gen_result, feed_dict)
                img_in_path = TRAIN_DIR + "/%d-input.jpg" % step_out
                img_out_path = TRAIN_DIR + "/%d-output.jpg" % step_out
                skimage.io.imsave(img_in_path, feed_dict[in_large][0])
                skimage.io.imsave(img_out_path, gen_out[0])
                print "img saved:", img_in_path, img_out_path