def __init__(self, images, texture, batch_size=1, content=None): self.batch_size = batch_size self.model = vgg.Vgg19() self.texture = texture self.images = images self.model.build(self.images) if content != None: with tf.name_scope('content_model'): self.content_model = vgg.Vgg19() self.content_model.build(content)
def main(content_path, style_path, output_dir, iterations, vgg_path, preserve_color): content_img, content_yuv = load_image(content_path) # ndarray类型 print 'content_img.shape', content_img.shape style_img, _ = load_image(style_path) with tf.Session() as sess: content_vgg = vgg19.Vgg19(vgg_path) content = tf.placeholder("float", content_img.shape) content_vgg.build(content) style_vgg = vgg19.Vgg19(vgg_path) style = tf.placeholder("float", style_img.shape) style_vgg.build(style) sess.run(tf.global_variables_initializer()) # 注意:以下两rep只需一次计算 content_rep = sess.run(getattr(content_vgg, CONTENT_LAYER), feed_dict={content: content_img}) # content的rep是该层内容 style_rep = sess.run(get_style_rep(style_vgg), feed_dict={style: style_img}) # 注意:style的rep是gram矩阵 # start with white noise noise = tf.truncated_normal(content_img.shape, stddev=0.1 * np.std(content_img)) image = tf.Variable(noise) image_vgg = vgg19.Vgg19(vgg_path) image_vgg.build(image) # define loss and optimizer content_loss = tf.nn.l2_loss( getattr(image_vgg, CONTENT_LAYER) - content_rep) / content_rep.size print 'content.shape:', content.shape style_loss = compute_style_loss(style_rep, image_vgg) total_loss = ALPHA * content_loss + BETA * style_loss optimizer = tf.train.AdamOptimizer(LR) optimize = optimizer.minimize(total_loss) # style transfer with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(1, iterations + 1): sess.run(optimize) fmt_str = 'Iteration {:4}/{:4} content loss {:14} style loss {:14}' print fmt_str.format(i, iterations, ALPHA * content_loss.eval(), BETA * style_loss.eval()) output_path = os.path.join(output_dir, 'output_{:04}.jpg'.format(i)) save_image(image.eval(), output_path, content_yuv if preserve_color else None)
def main(content_path, style_path, output_dir, iterations, vgg_path, preserve_color): # mean subtract input images content_img, content_yuv = load_image(content_path) style_img, _ = load_image(style_path) # obtain content and style reps with tf.Session() as sess: content_vgg = vgg19.Vgg19(vgg_path) content = tf.placeholder("float", content_img.shape) content_vgg.build(content) style_vgg = vgg19.Vgg19(vgg_path) style = tf.placeholder("float", style_img.shape) style_vgg.build(style) sess.run(tf.global_variables_initializer()) content_rep = sess.run(getattr(content_vgg, CONTENT_LAYER), feed_dict={content: content_img}) style_rep = sess.run(getattr(style_vgg, CONTENT_LAYER), feed_dict={style: style_img}) # start with white noise noise = tf.truncated_normal(content_img.shape, stddev=0.1 * np.std(content_img)) image = tf.Variable(noise) image_vgg = vgg19.Vgg19(vgg_path) image_vgg.build(image) # define losses and optimizer content_loss = tf.nn.l2_loss( getattr(image_vgg, CONTENT_LAYER) - content_rep) / content_rep.size style_loss = compute_style_loss(style_rep, image_vgg) loss = ALPHA * content_loss + BETA * style_loss optimizer = tf.train.AdamOptimizer(LR).minimize(loss) # style transfer with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(1, iterations + 1): sess.run(optimizer) fmt_str = 'Iteration {:4}/{:4} content loss {:14} style loss {:14}' print( fmt_str.format(i, iterations, ALPHA * content_loss.eval(), BETA * style_loss.eval())) # undo mean subtract and save output image output_path = os.path.join(output_dir, 'output_{:04}.jpg'.format(i)) save_image(image.eval(), output_path, content_yuv if preserve_color else None)
def preprocess(self): """Calulate content feature maps and style gram matrixes.""" self.content_features = {} self.style_grams = {} with tf.Graph().as_default(): vgg = vgg19.Vgg19(self.opts.vgg19_npy_path) image = tf.placeholder(tf.float32, shape=(1, None, None, 3)) vgg.build(image) with tf.Session() as sess: for layer in self.opts.content_layers: feature_map = sess.run( vgg.end_points[layer], feed_dict={image: self.content_image}) self.content_features[layer] = feature_map print("layer {} feature map shape: {}".format( layer, feature_map.shape)) for layer in self.opts.style_layers: feature_map = sess.run(vgg.end_points[layer], feed_dict={image: self.style_image}) feature_map = np.reshape(feature_map, (-1, feature_map.shape[3])) gram = np.matmul(feature_map.T, feature_map) gram /= feature_map.size self.style_grams[layer] = gram print("layer {} gram matrix shape: {}".format( layer, gram.shape))
def build_graph(vgg19_model_path): pl_images = tf.placeholder("float", [1, 224, 224, 3]) vgg = vgg19.Vgg19(vgg19_model_path) with tf.name_scope("content_vgg"): vgg.build(pl_images) return vgg, pl_images
def test(self, test_image_path, model_path, maxlen): ixtoword = self.dataset['ix_to_word'] images = tf.placeholder("float32", [1, 224, 224, 3]) image_val = read_image(test_image_path) vgg = vgg19.Vgg19() with tf.name_scope("content_vgg"): vgg.build(images) fc7 = self.sess.run(vgg.relu7, feed_dict={images: image_val}) saver = tf.train.Saver() saver.restore(self.sess, model_path) generated_word_index = self.sess.run(self.generated_words, feed_dict={self.image: fc7}) generated_word_index = np.hstack(generated_word_index) generated_sentence = '' for x in generated_word_index: if x == 0: break word = ixtoword[str(x)] generated_sentence = generated_sentence + ' ' + word print ' ' print '--------------------------------------------------------------------------------------------------------' print generated_sentence return generated_sentence
def vgg19_pretrained(image): assert image.shape == (160, 160, 3) batch = np.array(image[np.newaxis, :, :, :]) with tf.Session(config=tf.ConfigProto(gpu_options=(tf.GPUOptions(per_process_gpu_memory_fraction=0.5)))) as sess: # with tf.device('/cpu:0'): # with tf.Session() as sess: images = tf.placeholder("float", [1, 160, 160, 3]) feed_dict = {images: batch} vgg = vgg19.Vgg19() with tf.name_scope("content_vgg"): vgg.build(images) conv3_1 = sess.run(vgg.conv3_1, feed_dict=feed_dict) conv5_1 = sess.run(vgg.conv5_1, feed_dict=feed_dict) return conv3_1[0,:,:,:], conv5_1[0,:,:,:] # prob = sess.run(vgg.prob, feed_dict=feed_dict) # print(prob) # utils.print_prob(prob[0], './synset.txt') # utils.print_prob(prob[1], './synset.txt') # img = np.random.random([160, 160, 3]) # a, b = vgg19_pretrained(img) # print(a.shape, b.shape)
def run(args): data = parse_img(args.img_path) vgg = vgg19.Vgg19() vgg.build(data) with tf.Session() as sess: if args.subplot: hspace = 0.6 plt.subplots_adjust(hspace=hspace) for layer_index, layer in enumerate(LAYERS): for sublayer_index, sublayer in enumerate(layer): features = sess.run(vgg.__dict__[sublayer]) print("layer '{}', shape '{}'".format( sublayer, str(features.shape))) plt.subplot(5, 9, (layer_index * 9 + sublayer_index) + 1) # cmap: hot, spring, cool, bone # or use plt.matshow im = plt.imshow(features[0, :, :, args.feature_map], cmap=plt.cm.gray) plt.title(sublayer, y=1.0) plt.colorbar(im) plt.show() else: for index, layer in enumerate(LAYERS): features = sess.run(vgg.__dict__[layer]) print("layer '{}', shape '{}'".format(layer, str(features.shape))) plt.figure(index + 1) plt.matshow(features[0, :, :, 0], cmap=plt.cm.gray, fignum=index + 1) plt.title(layer) plt.colorbar() plt.show()
def main(): basepath = TEST_DATA bakpath = "bak" parser = argparse.ArgumentParser() parser.add_argument("--path", help="读取某个文件夹下的所有图像文件,default=" + TEST_DATA, default=TEST_DATA) parser.add_argument("--npy", default='./vgg19.npy') #parser.add_argument("--tpath", help="读取训练图像数据,default="+TRAIN_DATA,default=TRAIN_DATA) args = parser.parse_args() if args.path: basepath = args.path mkdir(bakpath + "/" + basepath) imgdata = [] imgdata = loadFrom(basepath) #20180330 num = len(imgdata) if num == 0: utils.printcolor("图像文件数量为0", mode='bold', fore='red') return per = 10 if (num > 10) else num count = int(num / per) if (num % per == 0) else int(num / per) + 1 print(per, num, count, num % per) vgg = vgg19.Vgg19(args.npy) images = tf.placeholder("float", [per, 224, 224, 3]) with tf.name_scope("content_vgg"): vgg.build(images) for x in range(0, count): xdata = imgdata[x * per:x * per + per] #print(len(xdata)) if len(xdata) == num % per: vggx = vgg19.Vgg19(args.npy) images = tf.placeholder("float", [len(xdata), 224, 224, 3]) with tf.name_scope("content_vgg"): vggx.build(images) tensor_imgdata(xdata, images, vggx, bakpath) else: tensor_imgdata(xdata, images, vgg, bakpath)
def extract_feature(self, batch, batch_size): with tf.device('/gpu:0'): with tf.Graph().as_default(): with tf.Session() as sess: images = tf.placeholder("float", [batch_size, 224, 224, 3]) vgg = vgg19.Vgg19() with tf.name_scope("content_vgg"): vgg.build(images) feature_map_batch = sess.run(vgg.fc6, feed_dict={images: batch}) return feature_map_batch
def build_graph(self): vgg = vgg19.Vgg19(self.opts.vgg19_npy_path) output_image = tf.get_variable("output_image", initializer=self.init_image) vgg.build(output_image) tf.summary.image( 'output', tf.cast(tf.clip_by_value(output_image, 0, 255), tf.uint8)) # content loss self.content_loss = 0.0 layer_weights = map(float, self.opts.content_layer_loss_weights) if len(layer_weights) == 1: layer_weights = layer_weights * len(self.opts.content_layers) elif len(layer_weights) != len(self.opts.content_layers): raise ValueError("content_layer_loss_weights not match " "content_layers.") for layer, weight in zip(self.opts.content_layers, layer_weights): self.content_loss += weight * tf.losses.mean_squared_error( self.content_features[layer], vgg.end_points[layer]) / 2.0 tf.summary.scalar('content_loss', self.content_loss) # style loss self.style_loss = 0.0 layer_weights = map(float, self.opts.style_layer_loss_weights) if len(layer_weights) == 1: layer_weights = layer_weights * len(self.opts.style_layers) elif len(layer_weights) != len(self.opts.style_layers): raise ValueError("style_layer_loss_weights not match " "style_layers.") for layer, weight in zip(self.opts.style_layers, layer_weights): feature_map = vgg.end_points[layer] feature_map = tf.reshape(feature_map, (-1, feature_map.shape[3])) gram = tf.matmul(tf.transpose(feature_map), feature_map) gram /= tf.cast(tf.size(feature_map), tf.float32) self.style_loss += weight * tf.losses.mean_squared_error( self.style_grams[layer], gram) / 4.0 tf.summary.scalar('style_loss', self.style_loss) # total variation denoising # see https://en.wikipedia.org/wiki/Total_variation_denoising self.total_variation_loss = tf.losses.mean_squared_error( output_image[:, 1:, :, :], output_image[:, :-1, :, :]) + \ tf.losses.mean_squared_error( output_image[:, :, 1:, :], output_image[:, :, :-1, :]) tf.summary.scalar('total_variation_loss', self.total_variation_loss) self.loss = self.opts.content_loss_weight * self.content_loss \ + self.opts.style_loss_weight * self.style_loss \ + self.opts.total_variation_loss_weight * self.total_variation_loss tf.summary.scalar('loss', self.loss) self.output_image = output_image
def main(args): train_data = 'expression/fer2013-train' test_data = 'expression/fer2013-test' train_img, train_label = load_image(train_data) test_img, test_label = load_image(test_data) images = tf.placeholder(tf.float32, [None, 224, 224, 3]) label = tf.placeholder(tf.float32, [None, 7]) if args.network_model == "vgg16": vgg = vgg16.Vgg16(args.fine_tuning) else: vgg = vgg19.Vgg19(args.fine_tuning) predict = vgg.build(images) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=predict)) #if fine_tuning='all',we will use GradientDescent to optimize all variables #if fine_tuning!='all',we will only use Adam to optimize the fully connected layers if args.fine_tuning == "all": optimizer = tf.train.GradientDescentOptimizer( learning_rate=0.001).minimize(cross_entropy) else: optimizer = tf.train.AdamOptimizer( learning_rate=0.001).minimize(cross_entropy) correct = tf.equal(tf.argmax(tf.nn.softmax(predict), 1), tf.argmax(label, 1)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.all_variables()) train = next_batch(train_img, train_label, 32) for i in range(args.train_step): x_batch, y_batch = train.next() loss, _, acc, = sess.run([cross_entropy, optimizer, accuracy], feed_dict={ images: x_batch, label: y_batch }) if i % 10 == 0: saver.save(sess, 'save_variables/vgg.module', global_step=i) print('number %d loss is %f' % (i, loss)) print('number %d accuracy is %f' % (i, acc)) test_accuracy = 0 test = next_batch(test_img, test_label, 32) for j in range(100): x_batch, y_batch = test.next() acc = sess.run(accuracy, feed_dict={ images: x_batch, label: y_batch }) test_accuracy += acc print('test accuracy is %f' % (test_accuracy / 100))
def vgg19_module(imageStack, sess): batch = np.array(imageStack) images = tf.placeholder("float", [None, 160, 160, 3]) feed_dict = {images: batch} vgg = vgg19.Vgg19() with tf.name_scope("content_vgg"): vgg.build(images) conv3_1 = sess.run(vgg.conv3_1, feed_dict=feed_dict) return conv3_1
def make_examples(path, num_examples): """ Make examples for training of the restoration net. """ filenames = list_images(path, True) for i in range(num_examples): file = filenames[i] im = Image.open(file)#imread(file) im = np.array(im) im = im.astype(np.float32) im_dim = np.zeros( [1, im.shape[0], im.shape[1], im.shape[2]]).astype(np.float32) im_dim[0, :, :, 0] = im[:, :, 2] - VGG_MEAN[0] im_dim[0, :, :, 1] = im[:, :, 1] - VGG_MEAN[1] im_dim[0, :, :, 2] = im[:, :, 0] - VGG_MEAN[2] tf.reset_default_graph() with tf.Session() as sess: im_tensor = tf.stack(im_dim) bgr = im_tensor vgg = vgg19.Vgg19() vgg.build(bgr) convs = [vgg.conv5_1, vgg.conv4_1, vgg.conv3_1, vgg.conv2_1, vgg.conv1_1] pools = [vgg.pool4, vgg.pool3, vgg.pool2, vgg.pool1] features = np.zeros(3, dtype=object) #convolution layers features_convs = np.zeros(5, dtype=object) for c in range(5): features_convs[c] = sess.run(convs[c]) features[0] = features_convs #pooling layers features_dims = np.zeros(4, dtype=object) features_ind = np.zeros(4, dtype=object) for p in range(4): features_dims[p] = [features_convs[p+1].shape[1], features_convs[p+1].shape[2]] features_ind[p] = sess.run(pools[p][1]) features[1] = features_dims features[2] = features_ind x = features y = im_dim x_name = "test_x" + str(i) y_name = "test_y" + str(i) np.save(x_name, x) np.save(y_name, y)
def load_model(name, path): if (name == "vgg19"): model = vgg19.Vgg19(get_data_mean(), path, trainable=False, skippable=False) if (name == "vgg16"): model = vgg16.Vgg16(get_data_mean(), path, trainable=False, skippable=False) return model
def __init__(self, config): self.sess = tf.Session() self.config = config self.content_img_bgr, self.content_img_yuv = self.load_image( self.config.content_path) self.style_img_bgr, _ = self.load_image(self.config.style_path) self.content_input = tf.placeholder(tf.float32, self.content_img_bgr.shape) self.style_input = tf.placeholder(tf.float32, self.style_img_bgr.shape) self.content_vgg = vgg19.Vgg19(self.config.vgg_path) self.content_vgg.build(self.content_input) self.style_vgg = vgg19.Vgg19(self.config.vgg_path) self.style_vgg.build(self.style_input) self.sess.run(tf.global_variables_initializer()) # 注意:以下两rep只需被计算一次 self.content_rep = self.sess.run( getattr(self.content_vgg, self.config.content_layer), feed_dict={self.content_input: self.content_img_bgr}) self.style_rep = self.sess.run( self.get_style_rep(self.style_vgg), feed_dict={self.style_input: self.style_img_bgr}) # 从白噪声开始(noise是变量) self.noise = tf.Variable( tf.truncated_normal(self.content_img_bgr.shape, stddev=0.1 * np.std(self.content_img_bgr))) self.noise_vgg = vgg19.Vgg19(self.config.vgg_path) self.noise_vgg.build(self.noise) self.content_loss self.style_loss self.optimize self.sess.run(tf.global_variables_initializer())
def build_model(self, batch_size): self.y = tf.placeholder(tf.float32, [self.batch_size, 1], name='y') self.images1 = tf.placeholder( tf.float32, [self.batch_size, self.input_height, self.input_width, 3], name='images1') self.images2 = tf.placeholder( tf.float32, [self.batch_size, self.input_height, self.input_width, 3], name='images2') self.contour1 = tf.placeholder( tf.float32, [self.batch_size, self.input_height, self.input_width, 3], name='contour1') self.contour2 = tf.placeholder( tf.float32, [self.batch_size, self.input_height, self.input_width, 3], name='contour2') tf.summary.image('paired_image1', self.images1) tf.summary.image('paired_image2', self.images2) tf.summary.image('paired_contour1', self.contour1) tf.summary.image('paired_contour2', self.contour2) # self.imagecon1 = tf.concat([self.images1,self.contour1], axis=3) # self.imagecon2 = tf.concat([self.images2,self.contour2], axis=3) self.images = tf.concat( [self.images1, self.contour1, self.images2, self.contour2], axis=0) vgg = vgg19.Vgg19() with tf.name_scope("content_vgg"): vgg.build(self.images) features = [ vgg.conv1_1, vgg.conv1_2, vgg.conv2_1, vgg.conv2_2, vgg.conv3_1, vgg.conv3_2, vgg.conv3_3, vgg.conv3_4, vgg.conv4_1, vgg.conv4_2, vgg.conv4_3, vgg.conv4_4, vgg.conv5_1, vgg.conv5_2, vgg.conv5_3, vgg.conv5_4 ] isimilarities = [] csimilarities = [] for l, f in enumerate(features): b1, b2 = tf.split(f, 2, 0) if1, cf1 = tf.split(b1, 2, 0) if2, cf2 = tf.split(b2, 2, 0) isimilarities.append(self.calculateSimilarity(if1, if2, l)) csimilarities.append(self.calculateSimilarity(cf1, cf2, l)) similarities = isimilarities + csimilarities self.similarity_logits = self.similarity_network(similarities) self.loss = self.losscalculate(self.similarity_logits, self.y)
def __init__(self): self.vgg = vgg19.Vgg19() self.sess = tf.Session() self.vgg_placeholder = tf.placeholder("float", [1, 224, 224, 3]) self.vgg.build(self.vgg_placeholder,0) self.featrue_dict={"conv1_1":self.vgg.conv1_1,"conv1_2":self.vgg.conv1_2,"conv2_1":self.vgg.conv2_1, "conv2_2": self.vgg.conv2_2,"conv3_1":self.vgg.conv3_1,"conv3_2":self.vgg.conv3_2, "conv3_3": self.vgg.conv3_3, "conv3_4": self.vgg.conv3_4, "conv4_1": self.vgg.conv4_1, "conv4_2": self.vgg.conv4_2, "conv4_3": self.vgg.conv4_3, "conv4_4": self.vgg.conv4_4, "conv5_1": self.vgg.conv5_1, "conv5_2": self.vgg.conv5_2, "conv5_3": self.vgg.conv5_3, "conv5_4": self.vgg.conv5_4 } print("CNNfeature inited!")
def __init__(self, config): self.sess = tf.Session() self.config = config # 由于风格固定,__init__可以加载风格图 self.style_img_bgr, _ = self.load_image(self.config.style_path) # content_vggs,注意shape中添加batch_size self.content_input = tf.placeholder(tf.float32, self.config.batch_shape) self.content_vggs = [] for i in range(self.config.batch_size): self.content_vggs.append(vgg19.Vgg19(self.config.vgg_path)) self.content_vggs[i].build(self.content_input[i]) # style_vgg self.style_input = tf.placeholder(tf.float32, self.style_img_bgr.shape) # 注意:style的shape不遵循config self.style_vgg = vgg19.Vgg19(self.config.vgg_path) self.style_vgg.build(self.style_input) # noise_vgg self.noise_input = tf.placeholder(tf.float32, [1, None, None, self.config.num_channels]) self.noise = self.sess.run(tf.truncated_normal(self.config.noise_shape, stddev=0.001)) self.trans_net_output_vgg = vgg19.Vgg19(self.config.vgg_path) self.trans_net_output_vgg.build(self.trans_net_output) self.sess.run(tf.global_variables_initializer()) # 内容表示 & 风格表示:内容图的内容表示,风格图的风格表示,noise的both self.content_reps = [] for i in range(self.config.batch_size): self.content_reps.append(getattr(self.content_vggs[i], self.config.content_layer)) self.style_rep = self.sess.run(self.get_style_rep(self.style_vgg), feed_dict={self.style_input: self.style_img_bgr}) # 注意:风格图的风格表示只需被计算一次 self.noise_content_rep = getattr(self.trans_net_output_vgg, self.config.content_layer) self.noise_style_rep = self.get_style_rep(self.trans_net_output_vgg) self.content_loss self.style_loss self.optimize self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver()
def make_features(im_path): """ Return image VGG's features """ im = Image.open(im_path) im = im.resize(((int)(im.size[0]/1),(int)(im.size[1]/1)), Image.ANTIALIAS) im = np.array(im) im = im.astype(np.float32) im_dim = np.zeros( [1, im.shape[0], im.shape[1], im.shape[2]]).astype(np.float32) im_dim[0, :, :, 0] = im[:, :, 2] - VGG_MEAN[0] im_dim[0, :, :, 1] = im[:, :, 1] - VGG_MEAN[1] im_dim[0, :, :, 2] = im[:, :, 0] - VGG_MEAN[2] tf.reset_default_graph() with tf.Session() as sess: im_tensor = tf.stack(im_dim) bgr = im_tensor vgg = vgg19.Vgg19() vgg.build(bgr) convs = [vgg.conv5_1, vgg.conv4_1, vgg.conv3_1, vgg.conv2_1, vgg.conv1_1] pools = [vgg.pool4, vgg.pool3, vgg.pool2, vgg.pool1] features = np.zeros(3, dtype=object) #convolution layers features_convs = np.zeros(5, dtype=object) for c in range(5): features_convs[c] = sess.run(convs[c]) features[0] = features_convs #pooling layers features_dims = np.zeros(4, dtype=object) features_ind = np.zeros(4, dtype=object) for p in range(4): features_dims[p] = [features_convs[p+1].shape[1], features_convs[p+1].shape[2]] features_ind[p] = sess.run(pools[p][1]) features[1] = features_dims features[2] = features_ind return features
def semantic_loss_with_attention(real, fake, batch_size): """""" vgg = vgg19.Vgg19('/home/benjamin/Workspace/ml/I19tModel/vgg19.npy') vgg.build(real) real_feature_map = vgg.conv3_3_no_activation mask_tensor = get_centre_mask_tensor(int(fake.shape[2]), batch_size) print("mask_tensor.shape = ", mask_tensor.shape) fake_masked = tf.multiply(mask_tensor, fake) + tf.multiply((1 - mask_tensor), real) vgg.build(fake_masked) fake_feature_map = vgg.conv3_3_no_activation loss = L1_loss(real_feature_map, fake_feature_map) return loss
def preprocess_style(opts): style_grams = {} with tf.Graph().as_default(): vgg = vgg19.Vgg19(opts.vgg19_npy_path) image = imread(opts.style_image_path) image = np.expand_dims(image, 0) vgg.build(image, sub_mean=True) with tf.Session() as sess: for layer in opts.style_layers: feature_map = sess.run(vgg.end_points[layer]) feature_map = np.reshape(feature_map, (-1, feature_map.shape[3])) gram = np.matmul(feature_map.T, feature_map) gram /= feature_map.size gram = np.expand_dims(gram, 0) style_grams[layer] = gram tf.logging.info("layer {} gram matrix shape: {}".format( layer, gram.shape)) return style_grams
def vgg19_pretrained(image): assert image.shape == (160, 160, 3) batch = np.array(image[np.newaxis, :, :, :]) with tf.Session(config=tf.ConfigProto(gpu_options=(tf.GPUOptions( per_process_gpu_memory_fraction=0.5)))) as sess: # with tf.device('/cpu:0'): # with tf.Session() as sess: images = tf.placeholder("float", [1, 160, 160, 3]) feed_dict = {images: batch} vgg = vgg19.Vgg19() with tf.name_scope("content_vgg"): vgg.build(images) conv3_1 = sess.run(vgg.conv3_1, feed_dict=feed_dict) conv5_1 = sess.run(vgg.conv5_1, feed_dict=feed_dict) return conv3_1, conv5_1
def run(args): txt_file = args.data_path ds = extractor_input_fn(txt_file, args) it = ds.make_initializable_iterator() data = it.get_next()['data'] vgg = vgg19.Vgg19() vgg.build(data) with tf.Session() as sess: sess.run(it.initializer) while True: try: start_time = time.time() features = sess.run(vgg.__dict__[args.feature_layer]) print(("Batch time: {} ms".format(1000.0 * (time.time() - start_time)))) print("layer '{}', shape '{}'".format(args.feature_layer, features.shape)) except tf.errors.OutOfRangeError: break
def build_graph(self): # what will be train init_image = np.random.normal(size=self.content.shape, scale=np.std(self.content)) self.x = tf.Variable(init_image, trainable=True, dtype=tf.float32) # get content features with tf.Graph().as_default(): self.input_content = tf.placeholder(tf.float32, shape=self.content.shape) net = vgg19.Vgg19(self.input_content).get_all_layers() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for layer in CONTENT_LAYERS: featuremap = sess.run( net[layer], feed_dict={self.input_content: self.content}) self.content_feature[layer] = featuremap # get content features with tf.Graph().as_default(): self.input_style = tf.placeholder(tf.float32, shape=self.style.shape) net = vgg19.Vgg19(self.input_style).get_all_layers() with tf.Session() as sess: for layer in STYLE_LAYERS: featuremap = net[layer] gram = self.gram_matrix(featuremap) self.style_feature[layer] = sess.run( gram, feed_dict={self.input_style: self.style}) # get gen_image net self.gen_image_net = vgg19.Vgg19(self.x).get_all_layers() # loss definition self.l_content = 0 self.l_style = 0 self.l_total = 0 # compute loss for layer in CONTENT_LAYERS: w = 1 / len(CONTENT_LAYERS) self.l_content += w * tf.reduce_sum( tf.pow( (self.gen_image_net[layer] - self.content_feature[layer]), 2)) / 2 for layer in STYLE_LAYERS: _, height, width, dim = self.gen_image_net[layer].get_shape() N = height.value * width.value M = dim.value w = 1 / len(STYLE_LAYERS) gram_style = self.style_feature[layer] gram_gen = self.gram_matrix(self.gen_image_net[layer]) self.l_style += w * (1. / (4 * N**2 * M**2)) * tf.reduce_sum( tf.pow((gram_gen - gram_style), 2)) self.l_total = alpha * self.l_content + beta * self.l_style self.train_step = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.l_total)
parser.add_argument('--content', default=data_dir + '/overfit/COCO_val2014_000000000074.jpg', type=str, help='Content image.') parser.add_argument('--style', default=data_dir + '/paintings/edvard_munch-the_scream.jpg', type=str, help='Style image.') parser.add_argument('--image_size', default=256, type=int, help='Input image size.') parser.add_argument('--ratio', default=4, type=int, help='Ratio between encoding and decoding') parser.add_argument('--nb_res_layer', default=5, type=int, help='Number of residual layer.') args = parser.parse_args() dir = os.path.dirname(os.path.realpath(__file__)) results_dir = dir + '/data/st' input_shape = [None, args.image_size, args.image_size, 3] fst = fast_style_transfer(input_shape, ratio=args.ratio, nb_res_layer=args.nb_res_layer) tf.image_summary('input_img', fst['input'], 2) tf.image_summary('output_img', fst['output'], 2) vgg = vgg19.Vgg19() style_loss = 1 content_loss = 1 tv_loss = 1 tf.scalar_summary('style_loss', style_loss) tf.scalar_summary('content_loss', content_loss) tf.scalar_summary('tv_loss', tv_loss) adam = tf.AdamOptimizer(1e-3) train_op = adam.minimize(total_loss) for i in range(10): style_coef = np.random.random_integers(50,150) content_coef = np.random.random_integers(5,10)
# lmbd1, lmbd2 : l1, l2 norm reg. constants for proximal gradient respectively lmbd1 = lmbd2 = 1e-5 # number of steps to train num_steps = 100000 # create and train the model with tf.Session() as sess: spatial_video = tf.placeholder(tf.float32, [None, 224, 224, 3]) stacked_flow = tf.placeholder(tf.float32, [None, 224, 224, L]) labels = tf.placeholder(tf.int32, [1]) num_spatial_frames = tf.shape(spatial_video)[0] num_flow_stacks = tf.shape(stacked_flow)[0] # build VGG19 vgg = vgg19.Vgg19("vgg19.npy") vgg.build(spatial_video) vgg_fc = tf.reshape(vgg.relu6, [1, num_spatial_frames, 4096]) # build spatial LSTM network with tf.variable_scope("spatial_lstm"): lstm_stack = tf.contrib.rnn.MultiRNNCell([ tf.contrib.rnn.BasicLSTMCell(1024, state_is_tuple=True), tf.contrib.rnn.BasicLSTMCell(512, state_is_tuple=True) ], state_is_tuple=True) spatial_lstm = tf.nn.dynamic_rnn(lstm_stack, vgg_fc, dtype=tf.float32, time_major=False) # method 1: aggregate all time frames
import numpy as np import tensorflow as tf import vgg19 import utils img1 = utils.load_image("./test_data/tiger.jpeg") img2 = utils.load_image("./test_data/puzzle.jpeg") batch1 = img1.reshape((1, 224, 224, 3)) batch2 = img2.reshape((1, 224, 224, 3)) batch = np.concatenate((batch1, batch2), 0) with tf.Session() as sess: images = tf.placeholder("float", [2, 224, 224, 3]) feed_dict = {images: batch} vgg = vgg19.Vgg19("./vgg19.npy") with tf.name_scope("content_vgg"): vgg.build(images) prob = sess.run(vgg.prob, feed_dict=feed_dict) print(prob) utils.print_prob(prob[0], './synset.txt') utils.print_prob(prob[1], './synset.txt')
def main(): sess = tf.Session() in_image = tf.placeholder(tf.float32, [None, CROP_FRAME, CROP_HEIGHT, CROP_WIDTH, 1]) gt_image = tf.placeholder(tf.float32, [None, CROP_FRAME, CROP_HEIGHT, CROP_WIDTH, 3]) out_image = network(in_image) print out_image.shape # return # loss function # G_loss = tf.reduce_mean(tf.abs(out_image - gt_image)) + tf.reduce_mean(tf.image.ssim(out_image, gt_image, 1.0)) structure_loss = tf.reduce_mean(tf.image.ssim_multiscale(out_image, gt_image, 1.0)) + tf.reduce_mean(tf.image.ssim(out_image, gt_image, 1.0)) vgg_gt = vgg19.Vgg19() with tf.name_scope("content_vgg_gt"): vgg_gt.build(gt_image[0,:,:,:,:]) fm1 = vgg_gt.conv3_4 vgg_out = vgg19.Vgg19() with tf.name_scope("content_vgg_out"): vgg_out.build(out_image[0,:,:,:,:]) fm2 = vgg_out.conv3_4 context_loss = tf.reduce_mean(tf.norm(fm1 - fm2)) mask_low = tf.placeholder(tf.float32, [None, None, None, None]) mask_high = tf.placeholder(tf.float32, [None, None, None, None]) num_low = tf.placeholder(tf.float32, [None]) num_high = tf.placeholder(tf.float32, [None]) diff = tf.norm(out_image - gt_image, axis = -1) region_loss = tf.reduce_mean(WEIGHT_LOW * tf.reduce_sum(diff * mask_low, [1, 2, 3]) / num_low + WEIGHT_HIGH * tf.reduce_sum(diff * mask_high, [1, 2, 3]) / num_high) G_loss = 2 - structure_loss + context_loss_weight * context_loss + region_loss v_loss = tf.placeholder(tf.float32) t_vars = tf.trainable_variables() lr = tf.placeholder(tf.float32) G_opt = tf.train.AdamOptimizer(learning_rate=lr).minimize(G_loss) loss_scalar = tf.summary.scalar('loss', v_loss) learning_rate_scalar = tf.summary.scalar('learning_rate', lr) base_log_dir = './logs/train_loss' writer_train_loss = get_writer(base_log_dir) base_log_dir = './logs/structure_loss' writer_structure_loss = get_writer(base_log_dir) base_log_dir = './logs/context_loss' writer_context_loss = get_writer(base_log_dir) base_log_dir = './logs/region_loss' writer_region_loss = get_writer(base_log_dir) base_log_dir = './logs/val' writer_val = get_writer(base_log_dir) base_log_dir = './logs/lr' writer_lr = get_writer(base_log_dir) count = 0 saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(CHECKPOINT_DIR) if ckpt: print('loaded ' + ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) # Raw data takes long time to load. Keep them in memory after loaded. gt_images = [None] * len(train_ids) input_images = [None] * len(train_ids) g_loss = np.zeros((len(train_ids), 1)) lastepoch = 0 if not os.path.isdir(RESULT_DIR): os.makedirs(RESULT_DIR) else: all_items = glob.glob(os.path.join(RESULT_DIR, '*')) all_folders = [os.path.basename(d) for d in all_items if os.path.isdir(d) and os.path.basename(d).isdigit()] for folder in all_folders: lastepoch = np.maximum(lastepoch, int(folder)) learning_rate = INIT_LR np.random.seed(ord('c') + 137) losses = [0] * len(valid_in_files) for epoch in range(lastepoch + 1, MAX_EPOCH + 1): e_st = time.time() if epoch % SAVE_FREQ == 0: save_results = True if not os.path.isdir(RESULT_DIR + '%04d' % epoch): os.makedirs(RESULT_DIR + '%04d' % epoch) else: save_results = False cnt = 0 bt = 0 learning_rate *= DECAY_RATE print '[INFO] learning rate:', learning_rate N = len(train_ids) all_order = np.random.permutation(N) last_group = (N // GROUP_NUM) * GROUP_NUM split_order = np.split(all_order[:last_group], (N // GROUP_NUM)) split_order.append(all_order[last_group:]) for order in split_order: gt_images = [None] * len(train_ids) input_images = [None] * len(train_ids) demosaiced_images = [None] * len(train_ids) order_frame = [(one, y) for y in [t for t in np.random.permutation(ALL_FRAME - CROP_FRAME) if t % FRAME_FREQ == 0] for one in order] index = np.random.permutation(len(order_frame)) for idx in index: # get the path from image id ind, start_frame = order_frame[idx] start_frame += np.random.randint(FRAME_FREQ) if start_frame + CROP_FRAME > ALL_FRAME: start_frame = ALL_FRAME - CROP_FRAME train_id = train_ids[ind] + '_start_frame_' + str(start_frame) in_path = in_files[ind] gt_path = gt_files[ind] st = time.time() cnt += 1 if input_images[ind] is None: try: read_in = LOAD_TRAIN_FUNC(in_path) input_images[ind] = np.expand_dims(unpack(read_in) / 65535.0, axis=0) except MemoryError as e: print(e) print(train_id, in_path) print('!!!train') continue raw = input_images[ind] if demosaiced_images[ind] is None: try: demosaiced_images[ind] = (demosaic(input_images[ind]) * 65535.0).astype('uint16') except MemoryError as e: print(e) print(train_id, in_path) print('!!!demosaic') continue demosaiced = demosaiced_images[ind] # raw = np.expand_dims(raw / 65535.0, axis=0) if gt_images[ind] is None: try: gt_images[ind] = np.expand_dims(np.float32(LOAD_GT_FUNC(gt_path) / 255.0), axis=0) except MemoryError as e: print(e) print(train_id, gt_path) print('!!!gt') continue gt_raw = gt_images[ind] # gt_raw = np.expand_dims(np.float32(gt_raw / 255.0), axis=0) B, F, H, W, C = raw.shape input_patch, demosaiced, gt_patch = crop(raw, demosaiced, gt_raw, H, W, start_frame) input_patch, demosaiced, gt_patch = flip(input_patch, demosaiced, gt_patch) mask_l, mask_h, num_l, num_h = get_low_light_area(demosaiced) input_patch = np.minimum(input_patch, 1.0) _, G_current, output, sl, cl, rl = sess.run([G_opt, G_loss, out_image, structure_loss, context_loss, region_loss], feed_dict={in_image: input_patch, gt_image: gt_patch, mask_low: mask_l, mask_high: mask_h, num_low: num_l, num_high: num_h, lr: learning_rate}) output = np.minimum(np.maximum(output, 0), 1) g_loss[ind] = G_current summary_loss = sess.run(loss_scalar, feed_dict={v_loss: G_current}) writer_train_loss.add_summary(summary_loss, count) summary_structure_loss = sess.run(loss_scalar, feed_dict={v_loss: (2 - sl)}) writer_structure_loss.add_summary(summary_structure_loss, count) summary_context_loss = sess.run(loss_scalar, feed_dict={v_loss: (context_loss_weight * cl)}) writer_context_loss.add_summary(summary_context_loss, count) summary_region_loss = sess.run(loss_scalar, feed_dict={v_loss: rl}) writer_region_loss.add_summary(summary_region_loss, count) count += 1 if save_results and start_frame in SAVE_FRAMES: temp = np.concatenate((gt_patch[0, :, ::-1, :, :], output[0, :, ::-1, :, :]), axis=2) try: vwrite((RESULT_DIR + '%04d/%s_train.avi' % (epoch, train_id)), (temp * 255).astype('uint8')) except OSError as e: print('\t', e, 'Skip saving.') print("%d %d Loss=%.8f Time=%.3f (avg:%.3f)" % (epoch, cnt, np.mean(g_loss[np.where(g_loss)]), time.time() - st, (time.time() - e_st) / cnt)), train_id # validation after each epoch v_start = time.time() for i in range(len(valid_in_files)): in_path = valid_in_files[i] gt_path = valid_gt_files[i] loss = validate(in_path, gt_path, sess, G_loss, out_image, in_image, gt_image, mask_low, mask_high, num_low, num_high) if DEBUG: print loss losses[i] = loss summary_lr, summary_val = sess.run([learning_rate_scalar, loss_scalar], feed_dict={lr: learning_rate, v_loss: np.mean(losses)}) writer_val.add_summary(summary_val, count) writer_lr.add_summary(summary_lr, count) print 'validation: Loss={:.8f} Time={:.3f}s'.format(np.mean(losses), time.time() - v_start) saver.save(sess, CHECKPOINT_DIR + 'model.ckpt') if save_results: saver.save(sess, RESULT_DIR + '%04d/' % epoch + 'model.ckpt')
tf.reset_default_graph() #Number of training images m = 64 #Define placeholders X = tf.placeholder(tf.float32, [m, 224, 224, 9]) y = tf.placeholder(tf.float32, [m, 224, 224, 3]) is_training = tf.placeholder(tf.bool) #Define model output y_out_encoder, encoder_history = encoder(X) y_out = decoder(y_out_encoder, encoder_history) #Define vgg network vgg19_y_out = vgg19.Vgg19() vgg19_y = vgg19.Vgg19() #Build the vgg network vgg19_y_out.build(y_out) vgg19_y.build(y) #Define loss g_loss = tf.reduce_mean(tf.abs(y - y_out)) #Loss measured only for the template image vgg19_y_loss = tf.reduce_mean(tf.abs(vgg19_y.conv1_2 - vgg19_y_out.conv1_2)) + \ tf.reduce_mean(tf.abs(vgg19_y.conv2_2 - vgg19_y_out.conv2_2)) + \ tf.reduce_mean(tf.abs(vgg19_y.conv3_4 - vgg19_y_out.conv3_4)) + \