def main(): window = 'preview' cv2.namedWindow(window) tfrecord_file_names = glob(path.join('data', '*.tfrecord.gz')) max_reads = 50 batch_size = 50 with tf.Graph().as_default() as graph: image_batch, type_batch = import_images(tfrecord_file_names, max_reads=max_reads, batch_size=batch_size) coord = tf.train.Coordinator() with tf.Session(graph=graph) as sess: init = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) sess.run(init) threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): Xs = sess.run(image_batch) for img in Xs: cv2.imshow(window, img) if (cv2.waitKey(33) & 0xff) == 27: coord.request_stop() break except tf.errors.OutOfRangeError: print('Read all examples.') finally: coord.request_stop() coord.join(threads) cv2.destroyWindow(window)
def main(): window = 'preview' cv2.namedWindow(window) tfrecord_file_names = glob(path.join('data', '*-2.tfrecord.gz')) max_reads = 200 batch_size = 50 with tf.Graph().as_default() as graph: image_batch, type_batch = import_images(tfrecord_file_names, max_reads=max_reads, batch_size=batch_size) import_graph('exported/vae-refine.pb', input_map={'image_batch': image_batch}, prefix='process') phase_train = graph.get_tensor_by_name( 'process/mogrify/vae/phase_train:0') embedding = graph.get_tensor_by_name( 'process/mogrify/vae/variational/add:0') reconstructed = graph.get_tensor_by_name('process/mogrify/clip:0') reconstructed.set_shape((None, 180, 320, 3)) refined = graph.get_tensor_by_name('process/refine/y:0') refined.set_shape((None, 180, 320, 3)) coord = tf.train.Coordinator() with tf.Session(graph=graph) as sess: init = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) sess.run(init) threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: print('Determining mean representations ...') coeff_means = {} counts = {} while not coord.should_stop(): type, coeffs = sess.run([type_batch, embedding], feed_dict={phase_train: False}) for i, (t, c) in enumerate(zip(type, coeffs)): if t not in coeff_means: coeff_means[t] = np.zeros(c.shape) counts[t] = 0 coeff_means[t] += c counts[t] += 1 min_count = np.min(list(counts.values())) if len(counts) >= 3 and min_count > 400: for k in coeff_means.keys(): coeff_means[k] /= counts[k] break # prior knowledge video_wim_hurry = 0 paintings_afremov = 1 video_disclosure_magnets = 2 print('Evaluating ...') while not coord.should_stop(): # obtain embeddings and type identifiers types, reference, coeffs = sess.run( [type_batch, image_batch, embedding], feed_dict={phase_train: False}) # for each coefficient, remove their original mean, # then add back a bit of Leonid Afremov alpha = 0.25 beta = 1.0 for i in range(coeffs.shape[0]): coeffs[i] -= alpha * coeff_means[types[i]] coeffs[i] += beta * coeff_means[paintings_afremov] # simply reversing the coefficients is interesting as well # coeffs[i] = list(reversed(coeffs[i])) # ... then fetching the images given the embedding. results = sess.run(refined, feed_dict={ phase_train: False, embedding: coeffs }) assert reference.shape == results.shape reference = reference[:3] results = results[:3] canvas = example_gallery(reference, results) cv2.imshow(window, canvas) if (cv2.waitKey(1000) & 0xff) == 27: print('User requested cancellation.') coord.request_stop() break except tf.errors.OutOfRangeError: print('Read all examples.') finally: coord.request_stop() coord.join(threads) coord.wait_for_stop() cv2.destroyWindow(window)
def main(inputs): window = 'preview' cv2.namedWindow(window) tfrecord_file_names = glob(path.join('data', '*-2.tfrecord.gz')) max_reads = 200 batch_size = 50 with tf.Graph().as_default() as graph: image_batch, type_batch = import_images(tfrecord_file_names, max_reads=max_reads, batch_size=batch_size) import_graph('exported/vae-refine.pb', input_map={'image_batch': image_batch}, prefix='process') phase_train = graph.get_tensor_by_name( 'process/mogrify/vae/phase_train:0') embedding = graph.get_tensor_by_name( 'process/mogrify/vae/variational/add:0') reconstructed = graph.get_tensor_by_name('process/mogrify/clip:0') reconstructed.set_shape((None, 180, 320, 3)) refined = graph.get_tensor_by_name('process/refine/y:0') refined.set_shape((None, 180, 320, 3)) coord = tf.train.Coordinator() with tf.Session(graph=graph) as sess: init = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) sess.run(init) threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: print('Determining mean representations ...') coeff_means = {} counts = {} while not coord.should_stop(): type, coeffs = sess.run([type_batch, embedding], feed_dict={phase_train: False}) for i, (t, c) in enumerate(zip(type, coeffs)): if t not in coeff_means: coeff_means[t] = np.zeros(c.shape) counts[t] = 0 coeff_means[t] += c counts[t] += 1 min_count = np.min(list(counts.values())) if len(counts) >= 3 and min_count > 400: for k in coeff_means.keys(): coeff_means[k] /= counts[k] break except tf.errors.OutOfRangeError: print('Read all examples.') finally: coord.request_stop() coord.join(threads) coord.wait_for_stop() # prior knowledge video_wim_hurry = 0 paintings_afremov = 1 video_disclosure_magnets = 2 for video in inputs: name = path.basename(video['file']) print('Evaluating {}...'.format(name)) crop = video['crop'] target_width, target_height = 320, 180 cap = cv2.VideoCapture(video['file']) height, width = cap.get(cv2.CAP_PROP_FRAME_HEIGHT), \ cap.get(cv2.CAP_PROP_FRAME_WIDTH) fps = cap.get(cv2.CAP_PROP_FPS) n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if video['length'] is not None: n_frames = int(video['length'] * fps) width -= crop[1] + crop[3] height -= crop[0] + crop[2] writer = cv2.VideoWriter('out-' + name, fourcc=cv2.VideoWriter_fourcc( 'm', 'p', '4', 'v'), fps=fps, frameSize=(int(width), int(height))) try: last_frame = None for _ in tqdm(range(n_frames + 1)): success, frame = cap.read() if not success: break # prepare for the network buffer = frame[crop[0]:-crop[2], crop[1]:-crop[3], :] / 255. buffer = cv2.resize(buffer, (target_width, target_height), interpolation=cv2.INTER_AREA) # obtain the embeddings for the video frame buffer_ = np.expand_dims(buffer, axis=0) coeffs = sess.run(embedding, feed_dict={ image_batch: buffer_, phase_train: False }) # adjust the embeddings alpha = 0.25 beta = 0.5 coeffs -= alpha * coeff_means[video['type']] coeffs += beta * coeff_means[paintings_afremov] # fetching the processed image results = sess.run(refined, feed_dict={ phase_train: False, embedding: coeffs }) assert results.shape[0] == 1 # apply moving average for _some_ temporal smoothing if last_frame is None: last_frame = results else: last_frame = 0.2 * last_frame + 0.8 * results # prepare the output frame size = (int(width), int(height)) video_frame = np.squeeze(last_frame[0]) video_frame = cv2.resize(video_frame, size, interpolation=cv2.INTER_LANCZOS4) # clipping some blacks video_frame = (video_frame * 1.05) - 0.05 # superimpose the original sw = target_width sh = int(target_width * height / width) buffer = cv2.resize(buffer, (sw, sh), interpolation=cv2.INTER_LANCZOS4) video_frame[10:sh + 10, 10:sw + 10] = buffer video_frame = np.clip(video_frame * 255., 0., 255.).astype(np.uint8) writer.write(video_frame) # cv2.imshow(window, video_frame) # if (cv2.waitKey(1) & 0xff) == 27: # print('User requested cancellation.') # coord.request_stop() # break finally: writer.release() cap.release() cv2.destroyWindow(window)
def main(): window = 'preview' cv2.namedWindow(window) tfrecord_file_names = glob(path.join('data', '*-2.tfrecord.gz')) max_reads = 200 batch_size = 50 with tf.Graph().as_default() as graph: image_batch, type_batch = import_images(tfrecord_file_names, max_reads=max_reads, batch_size=batch_size) import_graph('exported/vae-refine.pb', input_map={'image_batch': image_batch}, prefix='process') phase_train = graph.get_tensor_by_name('process/mogrify/vae/phase_train:0') embedding = graph.get_tensor_by_name('process/mogrify/vae/variational/add:0') reconstructed = graph.get_tensor_by_name('process/mogrify/clip:0') reconstructed.set_shape((None, 180, 320, 3)) refined = graph.get_tensor_by_name('process/refine/y:0') refined.set_shape((None, 180, 320, 3)) coord = tf.train.Coordinator() with tf.Session(graph=graph) as sess: init = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) sess.run(init) threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: print('Evaluating ...') while not coord.should_stop(): # fetching the embeddings given the inputs ... reference, coeffs = sess.run([image_batch, embedding], feed_dict={phase_train: False}) # ... then salting the embeddings ... coeffs += np.random.randn(coeffs.shape[0], coeffs.shape[1]) # ... then fetching the images given the new embeddings. results = sess.run(refined, feed_dict={phase_train: False, embedding: coeffs}) assert reference.shape == results.shape reference = reference[:3] results = results[:3] canvas = example_gallery(reference, results) cv2.imshow(window, canvas) if (cv2.waitKey(1000) & 0xff) == 27: print('User requested cancellation.') coord.request_stop() break except tf.errors.OutOfRangeError: print('Read all examples.') finally: coord.request_stop() coord.join(threads) coord.wait_for_stop() cv2.destroyWindow(window)
def main(): window = 'preview' cv2.namedWindow(window) tfrecord_file_names = glob(path.join('data', '*-2.tfrecord.gz')) max_reads = 200 batch_size = 50 n_epochs = 50 lr_pretrain = 1e-3 lr_train = 1e-4 img_step = 25 # timestamp = datetime.today().strftime('%Y%m%d-%H%M%S') # log_path = path.join('log.upsample', timestamp) log_path = path.join('log.upsample', '20170207-021128-2') with tf.Graph().as_default() as graph: global_step = tf.Variable(initial_value=0, trainable=False, name='global_step', dtype=tf.int64) learning_rate = tf.placeholder(tf.float32, shape=(), name='learning_rate') image_batch, type_batch = import_images(tfrecord_file_names, max_reads=max_reads, batch_size=batch_size) # forcing the inputs to have a known name image_batch = tf.identity(image_batch, name='image_batch') # todo: use augmentation; requires tf.map_fn() to work across batches # image_batch = tf.image.random_flip_left_right(image_batch) # image_batch = tf.image.random_flip_up_down(image_batch) import_graph('exported/vae.pb', input_map={'vae/x': image_batch}, prefix='mogrify') phase_train = graph.get_tensor_by_name('mogrify/vae/phase_train:0') reconstructed = graph.get_tensor_by_name('mogrify/vae/decoder/6/Elu:0') reconstructed.set_shape((None, 180, 320, 3)) # perform simple clipping reconstructed = tf.nn.relu(reconstructed, name='mogrify/clip') refined = generator(reconstructed, name='refine') with tf.variable_scope('training'): # programmer is using paranoia. it's super effective. # ensure that only the upsampling graph is trained vars = [ v for v in tf.trainable_variables() if v.name.startswith('refine') ] # image should be similar to the original ... loss_1 = tf.reduce_sum(tf.square(refined - image_batch), axis=3) loss_1 = tf.reduce_sum(loss_1, axis=2) loss_1 = tf.reduce_sum(loss_1, axis=1) loss_1 = tf.reduce_mean(loss_1) # ... but dissimilar to the VAE reconstruction ... loss_2 = tf.reduce_sum(tf.square(refined - reconstructed), axis=3) loss_2 = tf.reduce_sum(loss_2, axis=2) loss_2 = tf.reduce_sum(loss_2, axis=1) loss_2 = 1.e4 / tf.reduce_mean(loss_2) # ... and we do both at the same time. loss = 0.6 * loss_1 + 0.4 * loss_2 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\ .minimize(loss, global_step, var_list=vars) tf.summary.scalar('loss', loss_1, collections=['test']) tf.summary.scalar('learning_rate', learning_rate, collections=['test']) tf.summary.image('input', image_batch, collections=['test']) tf.summary.image('vae', reconstructed, collections=['test']) tf.summary.image('ref', refined, collections=['test']) test_summaries = tf.summary.merge_all('test') sv = tf.train.Supervisor(graph=graph, logdir=log_path) with sv.managed_session() as sess: try: print('Loading test data ...') test_imgs = ('test_1.jpg', 'test_2.jpg', 'test_3.jpg') paths = (path.join('test', img_name) for img_name in test_imgs) test_images = [cv2.imread(img_path) for img_path in paths] test_Xs = np.array(test_images, np.float32) / 255. epoch_i = 0 loss_value = 1e4 # TODO: needs to be turned off when continuing learning batch_i = 0 while not sv.should_stop( ) and epoch_i < 1000 and loss_value >= 400: # run pre-training on a small subset of the images (here: the visual test images) if (batch_i - 1) % img_step == 0: batch_i, loss_value, s, _ = sess.run( [global_step, loss, test_summaries, optimizer], feed_dict={ learning_rate: lr_pretrain, loss_2: 0, image_batch: test_Xs, reconstructed: test_Xs }) sv.summary_computed(sess, s, batch_i) else: batch_i, loss_value, _ = sess.run( [global_step, loss, optimizer], feed_dict={ learning_rate: lr_pretrain, loss_2: 0, image_batch: test_Xs, reconstructed: test_Xs }) print('pretrain', batch_i, loss_value) epoch_i += 1 if (batch_i - 1) % img_step == 0: # visually evaluate the outcome resized_inputs, results = sess.run([image_batch, refined], feed_dict={ image_batch: test_Xs, reconstructed: test_Xs }) assert resized_inputs.shape == results.shape canvas = example_gallery(resized_inputs, results) cv2.imshow(window, canvas) # display responsiveness if (cv2.waitKey(1) & 0xff) == 27: print('User requested cancellation.') sv.request_stop() break epoch_i = 0 while not sv.should_stop() and epoch_i < n_epochs: # run one optimization step if (batch_i - 1) % img_step == 0: batch_i, loss_value, s, _ = sess.run( [global_step, loss, test_summaries, optimizer], feed_dict={ learning_rate: lr_train, phase_train: False }) sv.summary_computed(sess, s, batch_i) else: batch_i, loss_value, _ = sess.run( [global_step, loss, optimizer], feed_dict={ learning_rate: lr_train, phase_train: False }) print('train', batch_i, loss_value) if (batch_i - 1) % img_step == 0: resized_inputs, results = sess.run( [ reconstructed, # reference, refined ], feed_dict={phase_train: False}) assert resized_inputs.shape == results.shape resized_inputs = resized_inputs[:3] results = results[:3] canvas = example_gallery(resized_inputs, results) cv2.imshow(window, canvas) # display responsiveness if (cv2.waitKey(1) & 0xff) == 27: print('User requested cancellation.') sv.request_stop() break except tf.errors.OutOfRangeError: print('Read all examples.') finally: sv.request_stop() sv.wait_for_stop() cv2.destroyWindow(window)