Esempio n. 1
0
def main():
    window = 'preview'
    cv2.namedWindow(window)

    tfrecord_file_names = glob(path.join('data', '*.tfrecord.gz'))
    max_reads = 50
    batch_size = 50

    with tf.Graph().as_default() as graph:
        image_batch, type_batch = import_images(tfrecord_file_names, max_reads=max_reads, batch_size=batch_size)

    coord = tf.train.Coordinator()
    with tf.Session(graph=graph) as sess:
        init = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
        sess.run(init)

        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            while not coord.should_stop():
                Xs = sess.run(image_batch)
                for img in Xs:
                    cv2.imshow(window, img)
                    if (cv2.waitKey(33) & 0xff) == 27:
                        coord.request_stop()
                        break

        except tf.errors.OutOfRangeError:
            print('Read all examples.')
        finally:
            coord.request_stop()
            coord.join(threads)

        cv2.destroyWindow(window)
Esempio n. 2
0
def main():
    window = 'preview'
    cv2.namedWindow(window)

    tfrecord_file_names = glob(path.join('data', '*-2.tfrecord.gz'))
    max_reads = 200
    batch_size = 50

    with tf.Graph().as_default() as graph:
        image_batch, type_batch = import_images(tfrecord_file_names,
                                                max_reads=max_reads,
                                                batch_size=batch_size)

        import_graph('exported/vae-refine.pb',
                     input_map={'image_batch': image_batch},
                     prefix='process')
        phase_train = graph.get_tensor_by_name(
            'process/mogrify/vae/phase_train:0')

        embedding = graph.get_tensor_by_name(
            'process/mogrify/vae/variational/add:0')

        reconstructed = graph.get_tensor_by_name('process/mogrify/clip:0')
        reconstructed.set_shape((None, 180, 320, 3))

        refined = graph.get_tensor_by_name('process/refine/y:0')
        refined.set_shape((None, 180, 320, 3))

    coord = tf.train.Coordinator()
    with tf.Session(graph=graph) as sess:
        init = tf.group(tf.local_variables_initializer(),
                        tf.global_variables_initializer())
        sess.run(init)

        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            print('Determining mean representations ...')
            coeff_means = {}
            counts = {}
            while not coord.should_stop():
                type, coeffs = sess.run([type_batch, embedding],
                                        feed_dict={phase_train: False})

                for i, (t, c) in enumerate(zip(type, coeffs)):
                    if t not in coeff_means:
                        coeff_means[t] = np.zeros(c.shape)
                        counts[t] = 0
                    coeff_means[t] += c
                    counts[t] += 1

                min_count = np.min(list(counts.values()))
                if len(counts) >= 3 and min_count > 400:
                    for k in coeff_means.keys():
                        coeff_means[k] /= counts[k]
                    break

            # prior knowledge
            video_wim_hurry = 0
            paintings_afremov = 1
            video_disclosure_magnets = 2

            print('Evaluating ...')
            while not coord.should_stop():
                # obtain embeddings and type identifiers
                types, reference, coeffs = sess.run(
                    [type_batch, image_batch, embedding],
                    feed_dict={phase_train: False})

                # for each coefficient, remove their original mean,
                # then add back a bit of Leonid Afremov
                alpha = 0.25
                beta = 1.0
                for i in range(coeffs.shape[0]):
                    coeffs[i] -= alpha * coeff_means[types[i]]
                    coeffs[i] += beta * coeff_means[paintings_afremov]

                    # simply reversing the coefficients is interesting as well
                    # coeffs[i] = list(reversed(coeffs[i]))

                # ... then fetching the images given the embedding.
                results = sess.run(refined,
                                   feed_dict={
                                       phase_train: False,
                                       embedding: coeffs
                                   })

                assert reference.shape == results.shape
                reference = reference[:3]
                results = results[:3]

                canvas = example_gallery(reference, results)
                cv2.imshow(window, canvas)

                if (cv2.waitKey(1000) & 0xff) == 27:
                    print('User requested cancellation.')
                    coord.request_stop()
                    break

        except tf.errors.OutOfRangeError:
            print('Read all examples.')
        finally:
            coord.request_stop()
            coord.join(threads)
            coord.wait_for_stop()

        cv2.destroyWindow(window)
def main(inputs):
    window = 'preview'
    cv2.namedWindow(window)

    tfrecord_file_names = glob(path.join('data', '*-2.tfrecord.gz'))
    max_reads = 200
    batch_size = 50

    with tf.Graph().as_default() as graph:
        image_batch, type_batch = import_images(tfrecord_file_names,
                                                max_reads=max_reads,
                                                batch_size=batch_size)

        import_graph('exported/vae-refine.pb',
                     input_map={'image_batch': image_batch},
                     prefix='process')
        phase_train = graph.get_tensor_by_name(
            'process/mogrify/vae/phase_train:0')

        embedding = graph.get_tensor_by_name(
            'process/mogrify/vae/variational/add:0')

        reconstructed = graph.get_tensor_by_name('process/mogrify/clip:0')
        reconstructed.set_shape((None, 180, 320, 3))

        refined = graph.get_tensor_by_name('process/refine/y:0')
        refined.set_shape((None, 180, 320, 3))

    coord = tf.train.Coordinator()
    with tf.Session(graph=graph) as sess:
        init = tf.group(tf.local_variables_initializer(),
                        tf.global_variables_initializer())
        sess.run(init)

        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            print('Determining mean representations ...')
            coeff_means = {}
            counts = {}
            while not coord.should_stop():
                type, coeffs = sess.run([type_batch, embedding],
                                        feed_dict={phase_train: False})

                for i, (t, c) in enumerate(zip(type, coeffs)):
                    if t not in coeff_means:
                        coeff_means[t] = np.zeros(c.shape)
                        counts[t] = 0
                    coeff_means[t] += c
                    counts[t] += 1

                min_count = np.min(list(counts.values()))
                if len(counts) >= 3 and min_count > 400:
                    for k in coeff_means.keys():
                        coeff_means[k] /= counts[k]
                    break

        except tf.errors.OutOfRangeError:
            print('Read all examples.')
        finally:
            coord.request_stop()
            coord.join(threads)
            coord.wait_for_stop()

        # prior knowledge
        video_wim_hurry = 0
        paintings_afremov = 1
        video_disclosure_magnets = 2

        for video in inputs:
            name = path.basename(video['file'])
            print('Evaluating {}...'.format(name))

            crop = video['crop']
            target_width, target_height = 320, 180

            cap = cv2.VideoCapture(video['file'])
            height, width = cap.get(cv2.CAP_PROP_FRAME_HEIGHT), \
                            cap.get(cv2.CAP_PROP_FRAME_WIDTH)
            fps = cap.get(cv2.CAP_PROP_FPS)
            n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            if video['length'] is not None:
                n_frames = int(video['length'] * fps)

            width -= crop[1] + crop[3]
            height -= crop[0] + crop[2]

            writer = cv2.VideoWriter('out-' + name,
                                     fourcc=cv2.VideoWriter_fourcc(
                                         'm', 'p', '4', 'v'),
                                     fps=fps,
                                     frameSize=(int(width), int(height)))

            try:
                last_frame = None
                for _ in tqdm(range(n_frames + 1)):
                    success, frame = cap.read()
                    if not success:
                        break

                    # prepare for the network
                    buffer = frame[crop[0]:-crop[2],
                                   crop[1]:-crop[3], :] / 255.
                    buffer = cv2.resize(buffer, (target_width, target_height),
                                        interpolation=cv2.INTER_AREA)

                    # obtain the embeddings for the video frame
                    buffer_ = np.expand_dims(buffer, axis=0)
                    coeffs = sess.run(embedding,
                                      feed_dict={
                                          image_batch: buffer_,
                                          phase_train: False
                                      })

                    # adjust the embeddings
                    alpha = 0.25
                    beta = 0.5

                    coeffs -= alpha * coeff_means[video['type']]
                    coeffs += beta * coeff_means[paintings_afremov]

                    # fetching the processed image
                    results = sess.run(refined,
                                       feed_dict={
                                           phase_train: False,
                                           embedding: coeffs
                                       })
                    assert results.shape[0] == 1

                    # apply moving average for _some_ temporal smoothing
                    if last_frame is None:
                        last_frame = results
                    else:
                        last_frame = 0.2 * last_frame + 0.8 * results

                    # prepare the output frame
                    size = (int(width), int(height))
                    video_frame = np.squeeze(last_frame[0])
                    video_frame = cv2.resize(video_frame,
                                             size,
                                             interpolation=cv2.INTER_LANCZOS4)

                    # clipping some blacks
                    video_frame = (video_frame * 1.05) - 0.05

                    # superimpose the original
                    sw = target_width
                    sh = int(target_width * height / width)
                    buffer = cv2.resize(buffer, (sw, sh),
                                        interpolation=cv2.INTER_LANCZOS4)
                    video_frame[10:sh + 10, 10:sw + 10] = buffer

                    video_frame = np.clip(video_frame * 255., 0.,
                                          255.).astype(np.uint8)
                    writer.write(video_frame)

                    # cv2.imshow(window, video_frame)
                    # if (cv2.waitKey(1) & 0xff) == 27:
                    #    print('User requested cancellation.')
                    #    coord.request_stop()
                    #    break

            finally:
                writer.release()
                cap.release()

        cv2.destroyWindow(window)
Esempio n. 4
0
def main():
    window = 'preview'
    cv2.namedWindow(window)

    tfrecord_file_names = glob(path.join('data', '*-2.tfrecord.gz'))
    max_reads = 200
    batch_size = 50

    with tf.Graph().as_default() as graph:
        image_batch, type_batch = import_images(tfrecord_file_names, max_reads=max_reads, batch_size=batch_size)

        import_graph('exported/vae-refine.pb', input_map={'image_batch': image_batch}, prefix='process')
        phase_train = graph.get_tensor_by_name('process/mogrify/vae/phase_train:0')

        embedding = graph.get_tensor_by_name('process/mogrify/vae/variational/add:0')

        reconstructed = graph.get_tensor_by_name('process/mogrify/clip:0')
        reconstructed.set_shape((None, 180, 320, 3))

        refined = graph.get_tensor_by_name('process/refine/y:0')
        refined.set_shape((None, 180, 320, 3))

    coord = tf.train.Coordinator()
    with tf.Session(graph=graph) as sess:
        init = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
        sess.run(init)

        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            print('Evaluating ...')
            while not coord.should_stop():
                # fetching the embeddings given the inputs ...
                reference, coeffs = sess.run([image_batch, embedding], feed_dict={phase_train: False})

                # ... then salting the embeddings ...
                coeffs += np.random.randn(coeffs.shape[0], coeffs.shape[1])

                # ... then fetching the images given the new embeddings.
                results = sess.run(refined, feed_dict={phase_train: False, embedding: coeffs})

                assert reference.shape == results.shape
                reference = reference[:3]
                results = results[:3]

                canvas = example_gallery(reference, results)
                cv2.imshow(window, canvas)

                if (cv2.waitKey(1000) & 0xff) == 27:
                    print('User requested cancellation.')
                    coord.request_stop()
                    break

        except tf.errors.OutOfRangeError:
            print('Read all examples.')
        finally:
            coord.request_stop()
            coord.join(threads)
            coord.wait_for_stop()

        cv2.destroyWindow(window)
Esempio n. 5
0
def main():
    window = 'preview'
    cv2.namedWindow(window)

    tfrecord_file_names = glob(path.join('data', '*-2.tfrecord.gz'))
    max_reads = 200
    batch_size = 50

    n_epochs = 50
    lr_pretrain = 1e-3
    lr_train = 1e-4
    img_step = 25

    # timestamp = datetime.today().strftime('%Y%m%d-%H%M%S')
    # log_path = path.join('log.upsample', timestamp)
    log_path = path.join('log.upsample', '20170207-021128-2')

    with tf.Graph().as_default() as graph:
        global_step = tf.Variable(initial_value=0,
                                  trainable=False,
                                  name='global_step',
                                  dtype=tf.int64)
        learning_rate = tf.placeholder(tf.float32,
                                       shape=(),
                                       name='learning_rate')

        image_batch, type_batch = import_images(tfrecord_file_names,
                                                max_reads=max_reads,
                                                batch_size=batch_size)

        # forcing the inputs to have a known name
        image_batch = tf.identity(image_batch, name='image_batch')

        # todo: use augmentation; requires tf.map_fn() to work across batches
        # image_batch = tf.image.random_flip_left_right(image_batch)
        # image_batch = tf.image.random_flip_up_down(image_batch)

        import_graph('exported/vae.pb',
                     input_map={'vae/x': image_batch},
                     prefix='mogrify')
        phase_train = graph.get_tensor_by_name('mogrify/vae/phase_train:0')
        reconstructed = graph.get_tensor_by_name('mogrify/vae/decoder/6/Elu:0')
        reconstructed.set_shape((None, 180, 320, 3))

        # perform simple clipping
        reconstructed = tf.nn.relu(reconstructed, name='mogrify/clip')

        refined = generator(reconstructed, name='refine')

        with tf.variable_scope('training'):
            # programmer is using paranoia. it's super effective.
            # ensure that only the upsampling graph is trained
            vars = [
                v for v in tf.trainable_variables()
                if v.name.startswith('refine')
            ]

            # image should be similar to the original ...
            loss_1 = tf.reduce_sum(tf.square(refined - image_batch), axis=3)
            loss_1 = tf.reduce_sum(loss_1, axis=2)
            loss_1 = tf.reduce_sum(loss_1, axis=1)
            loss_1 = tf.reduce_mean(loss_1)

            # ... but dissimilar to the VAE reconstruction ...
            loss_2 = tf.reduce_sum(tf.square(refined - reconstructed), axis=3)
            loss_2 = tf.reduce_sum(loss_2, axis=2)
            loss_2 = tf.reduce_sum(loss_2, axis=1)
            loss_2 = 1.e4 / tf.reduce_mean(loss_2)

            # ... and we do both at the same time.
            loss = 0.6 * loss_1 + 0.4 * loss_2

            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\
                .minimize(loss, global_step, var_list=vars)

        tf.summary.scalar('loss', loss_1, collections=['test'])
        tf.summary.scalar('learning_rate', learning_rate, collections=['test'])
        tf.summary.image('input', image_batch, collections=['test'])
        tf.summary.image('vae', reconstructed, collections=['test'])
        tf.summary.image('ref', refined, collections=['test'])
        test_summaries = tf.summary.merge_all('test')

    sv = tf.train.Supervisor(graph=graph, logdir=log_path)
    with sv.managed_session() as sess:
        try:
            print('Loading test data ...')

            test_imgs = ('test_1.jpg', 'test_2.jpg', 'test_3.jpg')
            paths = (path.join('test', img_name) for img_name in test_imgs)
            test_images = [cv2.imread(img_path) for img_path in paths]
            test_Xs = np.array(test_images, np.float32) / 255.

            epoch_i = 0
            loss_value = 1e4

            # TODO: needs to be turned off when continuing learning
            batch_i = 0
            while not sv.should_stop(
            ) and epoch_i < 1000 and loss_value >= 400:
                # run pre-training on a small subset of the images (here: the visual test images)
                if (batch_i - 1) % img_step == 0:
                    batch_i, loss_value, s, _ = sess.run(
                        [global_step, loss, test_summaries, optimizer],
                        feed_dict={
                            learning_rate: lr_pretrain,
                            loss_2: 0,
                            image_batch: test_Xs,
                            reconstructed: test_Xs
                        })
                    sv.summary_computed(sess, s, batch_i)
                else:
                    batch_i, loss_value, _ = sess.run(
                        [global_step, loss, optimizer],
                        feed_dict={
                            learning_rate: lr_pretrain,
                            loss_2: 0,
                            image_batch: test_Xs,
                            reconstructed: test_Xs
                        })
                print('pretrain', batch_i, loss_value)
                epoch_i += 1

                if (batch_i - 1) % img_step == 0:
                    # visually evaluate the outcome
                    resized_inputs, results = sess.run([image_batch, refined],
                                                       feed_dict={
                                                           image_batch:
                                                           test_Xs,
                                                           reconstructed:
                                                           test_Xs
                                                       })
                    assert resized_inputs.shape == results.shape

                    canvas = example_gallery(resized_inputs, results)
                    cv2.imshow(window, canvas)

                # display responsiveness
                if (cv2.waitKey(1) & 0xff) == 27:
                    print('User requested cancellation.')
                    sv.request_stop()
                    break

            epoch_i = 0
            while not sv.should_stop() and epoch_i < n_epochs:
                # run one optimization step
                if (batch_i - 1) % img_step == 0:
                    batch_i, loss_value, s, _ = sess.run(
                        [global_step, loss, test_summaries, optimizer],
                        feed_dict={
                            learning_rate: lr_train,
                            phase_train: False
                        })
                    sv.summary_computed(sess, s, batch_i)
                else:
                    batch_i, loss_value, _ = sess.run(
                        [global_step, loss, optimizer],
                        feed_dict={
                            learning_rate: lr_train,
                            phase_train: False
                        })
                print('train', batch_i, loss_value)

                if (batch_i - 1) % img_step == 0:
                    resized_inputs, results = sess.run(
                        [
                            reconstructed,  # reference,
                            refined
                        ],
                        feed_dict={phase_train: False})
                    assert resized_inputs.shape == results.shape

                    resized_inputs = resized_inputs[:3]
                    results = results[:3]

                    canvas = example_gallery(resized_inputs, results)
                    cv2.imshow(window, canvas)

                # display responsiveness
                if (cv2.waitKey(1) & 0xff) == 27:
                    print('User requested cancellation.')
                    sv.request_stop()
                    break

        except tf.errors.OutOfRangeError:
            print('Read all examples.')
        finally:
            sv.request_stop()
            sv.wait_for_stop()

        cv2.destroyWindow(window)