def render(content_file, style_file, content_region_file=None, style_region_file=None, random_init=False, load_saved_mapping=True, load_trained_image=False, blur_mapping=True, height=None, width=None, content_ratio=0., style3_ratio=3., style4_ratio=1., gram_ratio=0.001, diff_ratio=0., epochs=300, output_file="./train/output%d.jpg"): """ Render the synthesis with single generation. - Best used if style has high similarity with the content - If any ratio is set to 0, the corresponding Tensor will not be generated - Pure Gram Matrix synthesis is best for painting abstract style. (gram_ratio = 1 and all others 0) :param content_file: String file path of content image :param style_file: String file path of style image :param content_region_file: String file path of region mapping of content :param style_region_file: String file path of region mapping of image :param random_init: True to init the image with random :param load_saved_mapping: True to use saved mapping file :param load_trained_image: True to use saved training :param blur_mapping: True to blur the mapping before calculate the max argument :param height: int of height of result image :param width: int of width of result image. Leaving None with height will scaled according aspect ratio :param content_ratio: float32 of weight of content cost :param style3_ratio: float32 of weight of patch cost of conv3 layer :param style4_ratio: float32 of weight of patch cost of conv4 layer :param gram_ratio: float32 of weight of gram matrix cost :param diff_ratio: float32 of weight of local different cost :param epochs: int of number of epochs to train :param output_file: String file name of output file. %d will be replaced running number """ print("render started:") # print info: frame = inspect.currentframe() args, _, _, values = inspect.getargvalues(frame) for i in args: print(" %s = %s" % (i, values[i])) content_np = stylenet_core.load_image(content_file, height, width) style_np = stylenet_core.load_image(style_file, content_np.shape[0], content_np.shape[1]) content_batch = np.expand_dims(content_np, 0) style_batch = np.expand_dims(style_np, 0) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) as sess: tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: start_time = time.time() contents = tf.constant(content_batch, dtype=tf.float32, shape=content_batch.shape) styles = tf.constant(style_batch, dtype=tf.float32, shape=style_batch.shape) if random_init: var_image = tf.Variable(tf.truncated_normal(content_batch.shape, 0.5, 0.1)) else: var_image = tf.Variable(contents) vgg_content = custom_vgg19.Vgg19() with tf.name_scope("content_vgg"): vgg_content.build(contents) vgg_style = custom_vgg19.Vgg19() with tf.name_scope("style_vgg"): vgg_style.build(styles) vgg_var = custom_vgg19.Vgg19() with tf.name_scope("variable_vgg"): vgg_var.build(var_image) with tf.name_scope("cost"): # style: # TODO change file name based on out file name style3file = "./train/%s-style_map_3" % ( get_filename(content_file) + "-" + get_filename(style_file)) style4file = "./train/%s-style_map_4" % ( get_filename(content_file) + "-" + get_filename(style_file)) if content_region_file is None or style_region_file is None: if style3_ratio is 0: style_cost_3 = tf.constant(0.0) else: style_cost_3 = stylenet_core.get_style_cost_patch2(sess, vgg_var.conv3_1, vgg_content.conv3_1, vgg_style.conv3_1, style3file, load_saved_mapping=load_saved_mapping) if style4_ratio is 0: style_cost_4 = tf.constant(0.0) else: style_cost_4 = stylenet_core.get_style_cost_patch2(sess, vgg_var.conv4_1, vgg_content.conv4_1, vgg_style.conv4_1, style4file, load_saved_mapping=load_saved_mapping) else: content_regions_np = stylenet_core.load_image(content_region_file, content_np.shape[0], content_np.shape[1]) style_regions_np = stylenet_core.load_image(style_region_file, content_np.shape[0], content_np.shape[1]) content_regions_batch = np.expand_dims(content_regions_np, 0) style_regions_batch = np.expand_dims(style_regions_np, 0) content_regions = tf.constant(content_regions_batch, dtype=tf.float32, shape=content_regions_batch.shape) style_regions = tf.constant(style_regions_batch, dtype=tf.float32, shape=style_regions_batch.shape) content_regions = vgg_var.avg_pool(content_regions, None) content_regions = vgg_var.avg_pool(content_regions, None) style_regions = vgg_var.avg_pool(style_regions, None) style_regions = vgg_var.avg_pool(style_regions, None) if style3_ratio is 0: style_cost_3 = tf.constant(0.0) else: style_cost_3 = stylenet_core.get_style_cost_patch2(sess, vgg_var.conv3_1, vgg_content.conv3_1, vgg_style.conv3_1, style3file, content_regions, style_regions, load_saved_mapping, blur_mapping=blur_mapping) content_regions = vgg_var.avg_pool(content_regions, None) style_regions = vgg_var.avg_pool(style_regions, None) if style4_ratio is 0: style_cost_4 = tf.constant(0.0) else: style_cost_4 = stylenet_core.get_style_cost_patch2(sess, vgg_var.conv4_1, vgg_content.conv4_1, vgg_style.conv4_1, style4file, content_regions, style_regions, load_saved_mapping, blur_mapping=blur_mapping) if gram_ratio is 0: style_cost_gram = tf.constant(0.0) else: style_cost_gram = stylenet_core.get_style_cost_gram(sess, vgg_style, vgg_var) # content: if content_ratio is 0: content_cost = tf.constant(.0) else: fixed_content = stylenet_core.get_constant(sess, vgg_content.conv4_2) content_cost = stylenet_core.l2_norm_cost(vgg_var.conv4_2 - fixed_content) # # smoothness: if diff_ratio is 0: diff_cost = tf.constant(.0) else: diff_filter_h = tf.constant([0, 0, 0, 0, -1, 1, 0, 0, 0], tf.float32, [3, 3, 1, 1]) diff_filter_h = tf.concat([diff_filter_h, diff_filter_h, diff_filter_h], 2) diff_filter_v = tf.constant([0, 0, 0, 0, -1, 0, 0, 1, 0], tf.float32, [3, 3, 1, 1]) diff_filter_v = tf.concat([diff_filter_v, diff_filter_v, diff_filter_v], 2) diff_filter = tf.concat([diff_filter_h, diff_filter_v], 3) filtered_input = tf.nn.conv2d(var_image, diff_filter, [1, 1, 1, 1], "VALID") diff_cost = stylenet_core.l2_norm_cost(filtered_input) * 1e7 content_cost = content_cost * content_ratio style_cost_3 = style_cost_3 * style3_ratio style_cost_4 = style_cost_4 * style4_ratio style_cost_gram = style_cost_gram * gram_ratio diff_cost = diff_cost * diff_ratio cost = content_cost + style_cost_3 + style_cost_4 + style_cost_gram + diff_cost with tf.name_scope("train"): global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=0.02) gvs = optimizer.compute_gradients(cost) training = optimizer.apply_gradients(gvs, global_step=global_step) print("Net generated:", (time.time() - start_time)) start_time = time.time() with tf.name_scope("image_out"): image_out = tf.clip_by_value(tf.squeeze(var_image, [0]), 0, 1) # saver = tf.train.Saver(max_to_keep=1) # checkpoint = tf.train.get_checkpoint_state("./train") # if checkpoint and checkpoint.model_checkpoint_path and load_trained_image: # saver.restore(sess, checkpoint.model_checkpoint_path) # print("save restored:", checkpoint.model_checkpoint_path) # else: sess.run(tf.global_variables_initializer()) print("all variables init") print("Var init: %d" % (time.time() - start_time)) step_out = 0 start_time = time.time() for i in range(epochs): if i % 5 == 0: img = sess.run(image_out) img_out_path = output_file % step_out skimage.io.imsave(img_out_path, img) print("img saved: ", img_out_path) step_out, content_out, style_patch3_out, style_patch4_out, style_gram_out, diff_cost_out, cost_out \ , _ = sess.run( [global_step, content_cost, style_cost_3, style_cost_4, style_cost_gram, diff_cost, cost, training]) duration = time.time() - start_time print("Step %d: cost:%.10f\t(%.1f sec)" % (step_out, cost_out, duration), \ "\t content:%.5f, style_3:%.5f, style_4:%.5f, gram:%.5f, diff_cost_out:%.5f" \ % (content_out, style_patch3_out, style_patch4_out, style_gram_out, diff_cost_out)) # if (i + 1) % 10 == 0: # saved_path = saver.save(sess, "./train/saves-" + get_filename(content_file), # global_step=global_step) # print("net saved: ", saved_path) img = sess.run(image_out) img_out_path = output_file % step_out skimage.io.imsave(img_out_path, img) print("img saved: ", img_out_path)
img = skimage.io.imread("tex1-b.png", as_grey=True) / 255.0 # img = np.array([[0.4,0.6,0.1,0.2],[0.1,0.5,0.5,0],[1,1,0.4,0.3],[0.2,0.2,0.5,0.8]]) timg = tf.convert_to_tensor(img, dtype=tf.float32) # timg = tf.expand_dims(timg, 2) # print(timg.shape) gen_sm = twopoint_correlation_layer(noise) sm = twopoint_correlation_layer(timg[:height, :width]) tpc_loss = l2_loss(gen_sm - sm) # compute style loss texture_img, texture_shape = load_image("tex3.png") # h,w,c texture_model = vgg19.Vgg19() texture_model.build(texture_img, texture_shape[1:]) rgbnoise = tf.expand_dims(noise, 2) rgbnoise = tf.image.grayscale_to_rgb(rgbnoise) rgbnoise = tf.expand_dims(rgbnoise, 0) # print(rgbnoise.shape) x_model = vgg19.Vgg19() x_model.build(rgbnoise, rgbnoise.shape.as_list()[1:]) style_loss = get_texture_loss(x_model, texture_model) total_loss = Alpha * tpc_loss + Beta * style_loss optimizer = tf.train.AdamOptimizer(0.02).minimize(total_loss, var_list=noise)
help="path to where the styled image will be created") args = parser.parse_args() # Assign image paths from the arg parsing INPUT_PATH = os.path.realpath(args.input) STYLE_PATH = os.path.realpath(args.style) OUT_PATH = os.path.realpath(args.out) with tf.Session() as sess: parse_args() #about path of images and parse paras photo, image_shape = utils.load_image(INPUT_PATH) #load image image_shape = [1] + image_shape photo = photo.reshape(image_shape).astype(np.float32) art = utils.load_image2(STYLE_PATH, height=image_shape[1], width=image_shape[2]) art = art.reshape(image_shape).astype(np.float32) # Initialize the variable image that will become our final output as random noise noise = tf.Variable(tf.truncated_normal(image_shape, mean=.5, stddev=.1)) # VGG Networks Init with tf.name_scope('vgg_content'): content_model = vgg19.Vgg19() content_model.build(photo, image_shape[1:]) tf.placeholder(shape=None, dtype=np.float)
# Initialize and process photo image to be used for our content photo, image_shape = utils.load_image('../lib/images/content/2.jpg') image_shape = [1] + image_shape photo = photo.reshape(image_shape).astype(np.float32) # Initialize and process art image to be used for our style art = utils.load_image2('../lib/images/style/starry-night.jpg', height=image_shape[1], width=image_shape[2]) art = art.reshape(image_shape).astype(np.float32) # Initialize the variable image that will become our final output as random noise noise = tf.Variable(tf.truncated_normal(image_shape, mean=.5, stddev=.1)) # VGG Networks Init with tf.name_scope('vgg_content'): content_model = vgg19.Vgg19() content_model.build(photo, image_shape[1:]) with tf.name_scope('vgg_style'): style_model = vgg19.Vgg19() style_model.build(art, image_shape[1:]) with tf.name_scope('vgg_x'): x_model = vgg19.Vgg19() x_model.build(noise, image_shape[1:]) # Loss functions with tf.name_scope('loss'): # Content if CONTENT_WEIGHT is 0: content_loss = tf.constant(0.)
def train(ds, dis_learning_rate, gen_learning_rate): with tf.Session() as sess: start_time = time.time() in_train_gen = tf.placeholder(tf.bool) in_train_dis = tf.placeholder(tf.bool) in_large = tf.placeholder(tf.float32, [1, SIZE, SIZE, 3]) in_small = tf.placeholder(tf.float32, [1, SIZE, SIZE, 3]) """ # extra difficulty: blur the large image: blur_filter = tf.constant(1, shape=[5, 5, 1, 1], dtype=tf.float32) / 25 blur_filter = tf.tile(blur_filter, [1, 1, 3, 1]) in_large_blur = tf.nn.depthwise_conv2d(in_large, blur_filter, strides=[1, 1, 1, 1], padding='SAME') # reduce the size to smaller in_small = tf.nn.avg_pool(in_large_blur, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='SAME') # use stitch training method, slice the image into tiles and concat as batches t = create_tiles(in_small, SIZE / 4, SIZE / 4, 4) in_stitch = tf.concat(0, [tf.concat(0, t[y]) for y in xrange(4)]) # row1, row2, ... """ generator = TensorZoomNet(trainable=True, npy_path=GEN_NPY) with tf.name_scope("generator"): generator.build(in_small, train_mode=in_train_gen) # stitch the tiles back together after split the batches gen_split = tf.split(0, 4 * 4, generator.output) gen_result = tf.concat(1, [ tf.concat(2, [gen_split[x] for x in xrange(4 * y, 4 * y + 4)]) for y in xrange(4) ]) discriminator_truth = Discriminator(trainable=True, input_size=SIZE, npy_path=DIS_NPY) with tf.name_scope('dis_truth'): discriminator_truth.build(in_large, train_mode=in_train_dis) discriminator_gen = Discriminator(trainable=True, input_size=SIZE) with tf.name_scope('dis_gen'): discriminator_gen.build(gen_result, train_mode=in_train_dis, parent=discriminator_truth) vgg_content = custom_vgg19.Vgg19(vgg19_npy_path=VGG_NPY_PATH) with tf.name_scope("content_vgg"): vgg_content.build(in_large) vgg_var = custom_vgg19.Vgg19(var_map=vgg_content.var_map) with tf.name_scope("variable_vgg"): vgg_var.build(gen_result) prob_truth = discriminator_truth.prob prob_gen = discriminator_gen.prob prob_truth_mean = tf.reduce_mean(prob_truth) prob_gen_mean = tf.reduce_mean(prob_gen) with tf.name_scope("cost"): gen_cost_content = tf.sqrt( tf.reduce_mean(tf.square(vgg_var.conv2_2 - vgg_content.conv2_2))) gen_cost_generator = -tf.log( tf.clip_by_value(prob_gen_mean, 1e-10, 1.0)) * 2 gen_cost_invariant = get_invariant_cost2(gen_result) # for pre-train (purely with conv22): don't set these 2 cost # gen_cost_generator = tf.constant(0.0) # for pre train # gen_cost_invariant = tf.constant(0.0) # for pre train gen_cost = gen_cost_content + gen_cost_generator + gen_cost_invariant dis_cost = tf.reduce_mean( -(tf.log(prob_truth) + tf.log(tf.clip_by_value(1 - prob_gen, 1e-10, 1.0)))) with tf.name_scope("train"): gen_step = tf.Variable(0, name='gen_step', trainable=False) gen_train = tf.train.AdamOptimizer(learning_rate=gen_learning_rate) \ .minimize(gen_cost, gen_step, var_list=generator.var_list()) dis_train = tf.train.AdamOptimizer(learning_rate=dis_learning_rate) \ .minimize(dis_cost, var_list=discriminator_truth.get_all_var()) print "Net generated: %d" % (time.time() - start_time) start_time = time.time() # analysis for name, var in generator.var_dict.items(): tf.histogram_summary(name, var) for name, var in discriminator_truth.var_dict_name.items(): tf.histogram_summary(name, var) tf.scalar_summary("gen_cost", gen_cost) tf.scalar_summary("gen_cost_content", gen_cost_content) tf.scalar_summary("gen_cost_generator", gen_cost_generator) tf.scalar_summary("gen_cost_invariant", gen_cost_invariant) tf.scalar_summary("dis_cost", dis_cost) tf.scalar_summary("prob_truth", prob_truth_mean) tf.scalar_summary("prob_gen", prob_gen_mean) summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(SUMMARY_FOLDER, graph=sess.graph) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(TRAIN_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print "save restored:" + ckpt.model_checkpoint_path else: tf.initialize_all_variables().run() print "all variables init" print "Var init: %d" % (time.time() - start_time) start_time = time.time() for i in xrange(80000): # disable this part for pre-train with conv22 # train discriminator: feed_dict = { in_large: get_next_batch(ds), in_train_dis: True, in_train_gen: False } _, \ dis_cost_out, \ prob_truth_out, \ prob_gen_out \ = sess.run([ dis_train, dis_cost, prob_truth_mean, prob_gen_mean ], feed_dict) print "dis-step:\t\t\t\t\t " \ "dis-cost:%.10f\t\t " \ "prob_gen:%.10f\t " \ "prob_truth:%.10f" \ % ( dis_cost_out, prob_gen_out, prob_truth_out ) if math.isnan(dis_cost_out): raise Exception("error found") # train generator: feed_dict = { in_large: get_next_batch(ds), in_train_dis: False, in_train_gen: True } step_out, \ _, \ gen_cost_out, \ cost_content_out, \ cost_generator_out, \ cost_invariant_out, \ prob_gen_out \ = sess.run([ gen_step, gen_train, gen_cost, gen_cost_content, gen_cost_generator, gen_cost_invariant, prob_gen_mean ], feed_dict) duration = time.time() - start_time print "step: %d, " \ "\t(%.1f sec)\t " \ "gen-cost:%.10f\t " \ "prob_gen:%.10f,\t " \ "gen_cost_content:%.2f,\t " \ "gen_cost_generator:%.5f,\t " \ "gen_cost_invariant:%.5f" \ % ( step_out, duration, gen_cost_out, prob_gen_out, cost_content_out, cost_generator_out, cost_invariant_out ) if math.isnan(gen_cost_out): raise Exception("error found") if i == 0 or i == 9 or i == 49 or step_out % 100 == 0: feed_dict[in_train_dis] = False feed_dict[in_train_gen] = False summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step_out) if step_out % 2000 == 0: generator.save_npy( sess, TRAIN_DIR + "/save-gen-%d.npy" % step_out) discriminator_truth.save_npy( sess, TRAIN_DIR + "/save-dis-%d.npy" % step_out) else: generator.save_npy(sess, TRAIN_DIR + "/save-gen.npy") discriminator_truth.save_npy(sess, TRAIN_DIR + "/save-dis.npy") saved_path = saver.save(sess, TRAIN_DIR + "/saves", global_step=gen_step, write_meta_graph=False) print "net saved: " + saved_path # print image gen_out = sess.run(gen_result, feed_dict) img_in_path = TRAIN_DIR + "/%d-input.jpg" % step_out img_out_path = TRAIN_DIR + "/%d-output.jpg" % step_out skimage.io.imsave(img_in_path, feed_dict[in_large][0]) skimage.io.imsave(img_out_path, gen_out[0]) print "img saved:", img_in_path, img_out_path