コード例 #1
0
def do_blur(left_image_list, rigth_image_list, focus_list,
            stereonet_checkpoint, output_folder, intermediate, verbose,
            aperture, pyramidal_conv, disparity_range, from_stage, resume):
    #defines verbose levels
    if verbose >= 2:
        tf.logging.set_verbosity(tf.logging.INFO)
    elif verbose >= 1:
        tf.logging.set_verbosity(tf.logging.WARN)
    else:
        tf.logging.set_verbosity(tf.logging.ERROR)
    #remove verbose bits from tf
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    #parse input files
    left_img_files = parse_files_lines(left_image_list)
    right_img_files = parse_files_lines(rigth_image_list)
    focus_planes = parse_files_lines(focus_list)
    assert (len(left_img_files) == len(right_img_files))
    assert (len(left_img_files) == len(focus_planes))
    inputs = zip(left_img_files, right_img_files, focus_planes)
    #remove the part already computed
    inputs = inputs[resume:]
    #useful
    sequence_length = len(inputs)
    tf.logging.info("Going to compute for %d frames (first %d ignored)" %
                    (sequence_length, resume))

    #making output folder if needed
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    #loop on the frames
    prev_focus_plane = float('nan')
    sess = tf.Session()
    for i in range(sequence_length):
        tf.logging.info("Processing frame %d" % (i + resume))
        focus_plane = float(inputs[i][2])
        left_img_file = inputs[i][0]
        right_img_file = inputs[i][1]
        #open files
        tf.logging.info("Opening Images")
        left_img = np.expand_dims(Image.open(left_img_file), 0).astype(
            np.float32) / 255.0
        rigth_image = np.expand_dims(Image.open(right_img_file), 0).astype(
            np.float32) / 255.0
        #file size
        h, w = left_img.shape[1:3]
        if prev_focus_plane != focus_plane:
            tf.logging.info("Change in focus plane detected, rebuilding graph")
            tf.reset_default_graph()
            #frame placeholders
            left_img_ph = tf.placeholder(tf.float32, shape=[1, h, w, 3])
            rigth_img_ph = tf.placeholder(tf.float32, shape=[1, h, w, 3])
            #build the graph for the first focus frame
            net_output_ph = refnet_blur_baseline(
                left_img_ph,
                rigth_img_ph,
                target_disparities=[focus_plane],
                blur_magnitude=aperture,
                is_training=False,
                stop_grads=True,
                min_disp=disparity_range[0],
                max_disp=disparity_range[1],
                downsampling_trick_max_kernel_size=pyramidal_conv,
                from_stage=from_stage)
            #need to reopen session because tf is shitty
            sess.close()
            sess = tf.Session()
            #Note: could avoid having to reload weigts here
            tf.logging.info("Loading stereonet weigts")
            rv = optimistic_restore(sess, stereonet_checkpoint)
            tf.logging.info("Restored %d vars" % rv)
        #run the shit
        tf.logging.info("Runing depth estimation and refocusing")
        refocus_image, intermediate_result, _ = sess.run(net_output_ph,
                                                         feed_dict={
                                                             left_img_ph:
                                                             left_img,
                                                             rigth_img_ph:
                                                             rigth_image
                                                         })
        tf.logging.info("Done")

        tf.logging.info("Saving to " + output_folder +
                        "/refocused_[left_image_file_name]s")

        Image.fromarray((refocus_image[0][0, :, :, :] * 255).astype(
            np.uint8)).save(output_folder + "/refocused_%d.png" % (i + resume))
        if intermediate:
            tf.logging.info("Saving intermediate output")
            if from_stage == "disparity_map":
                disp = intermediate_result[0, :, :, :]
                disp = (disp - np.amin(disp)) / (np.amax(disp) - np.amin(disp))
                disp = np.tile(disp, [1, 1, 3])
                Image.fromarray((disp * 255).astype(
                    np.uint8)).save(output_folder + "/disparity_%d.png" %
                                    (i + resume))
            else:
                disparity_range = np.arange(
                    1, 18 + 1) * 8  #FIXME: see ben for disp=0
                for d in range(intermediate_result.shape[-1]):
                    Image.fromarray(
                        (intermediate_result[0, :, :, d] * 255).astype(
                            np.uint8)).save(output_folder +
                                            "/conf_volume_%d_%d" %
                                            (disparity_range[d], i + resume))
コード例 #2
0
ファイル: demo.py プロジェクト: KevinJ-Huang/noah-research
def do_blur(left_image, rigth_image, stereonet_checkpoint, output_folder,
            intermediate, verbose, timming, deploy, focus_plane, aperture,
            pyramidal_conv, disparity_range, from_stage):
    #defines verbose level

    if verbose >= 2:
        tf.logging.set_verbosity(tf.logging.INFO)
    elif verbose >= 1:
        tf.logging.set_verbosity(tf.logging.WARN)
    else:
        tf.logging.set_verbosity(tf.logging.ERROR)
    #remove verbose bits from tf
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    #Opening images
    tf.logging.info("Opening Images")
    left_img = np.expand_dims(Image.open(left_image), 0).astype(
        np.float32) / 255.0
    rigth_image = np.expand_dims(Image.open(rigth_image), 0).astype(
        np.float32) / 255.0
    h, w = left_img.shape[1:3]
    #making output folder if needed
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    #Building the graph
    tf.logging.info("Making graph for %d focal planes" % len(focus_plane))
    left_img_ph = tf.placeholder(tf.float32, shape=[1, h, w, 3])
    rigth_img_ph = tf.placeholder(tf.float32, shape=[1, h, w, 3])
    net_output_ph = refnet_blur_baseline(
        left_img_ph,
        rigth_img_ph,
        target_disparities=focus_plane,
        blur_magnitude=aperture,
        is_training=False,
        stop_grads=True,
        min_disp=disparity_range[0],
        max_disp=disparity_range[1],
        downsampling_trick_max_kernel_size=pyramidal_conv,
        from_stage=from_stage)
    #Runs the thing
    with tf.Session() as sess:
        tf.logging.info("Loading stereonet weigts")
        rv = optimistic_restore(sess, stereonet_checkpoint)
        tf.logging.info("Restored %d vars" % rv)

        if deploy:
            tf.logging.info("Saving model and weigts for later deployement")
            refocus_image_ph = net_output_ph[0]
            print(refocus_image_ph)
            tf.saved_model.simple_save(
                sess,
                output_folder + "/deployable",
                inputs={
                    "left_img": left_img_ph,
                    "rigth_img": rigth_img_ph
                },
                outputs={"refocus_image": refocus_image_ph[0]})
            tf.logging.info("Done")

        tf.logging.info("Runing depth estimation and refocusing")
        refocus_image, intermediate_result, _ = sess.run(net_output_ph,
                                                         feed_dict={
                                                             left_img_ph:
                                                             left_img,
                                                             rigth_img_ph:
                                                             rigth_image
                                                         })
        tf.logging.info("Done")

        if timming:
            tf.logging.info("Getting timing")
            feed_dict = {left_img_ph: left_img, rigth_img_ph: rigth_image}
            avg_timing = time_up_to(sess, net_output_ph[0], feed_dict)
            avg_timing_intermediate = time_up_to(sess, net_output_ph[1],
                                                 feed_dict)
            tf.logging.info(
                "Average timming for pipeline is %fs, including %fs to compute the disparity/cost volume and %fs for refocusing"
                % (avg_timing, avg_timing_intermediate,
                   avg_timing - avg_timing_intermediate))

        tf.logging.info("Saving to " + output_folder +
                        "/refocused_image_[focus_plane].png")
        for i in range(len(focus_plane)):
            f = focus_plane[i]
            Image.fromarray((refocus_image[i][0, :, :, :] * 255).astype(
                np.uint8)).save(output_folder + "/refocused_image_%f.png" % f)
        if intermediate:
            tf.logging.info("Saving intermediate output")
            if from_stage == "disparity_map":
                disp = intermediate_result[0, :, :, :]
                disp = (disp - np.amin(disp)) / (np.amax(disp) - np.amin(disp))
                disp = np.tile(disp, [1, 1, 3])
                Image.fromarray((disp * 255).astype(
                    np.uint8)).save(output_folder + "/disparity.png")
            else:
                disparity_range = np.arange(
                    1, 18 + 1) * 8  #FIXME: see ben for disp=0
                print(np.amin(intermediate_result))
                print(np.amax(intermediate_result))

                for d in range(intermediate_result.shape[-1]):
                    Image.fromarray(
                        (intermediate_result[0, :, :, d] * 255).astype(
                            np.uint8)).save(output_folder +
                                            "/conf_volume_%d.png" %
                                            disparity_range[d])
コード例 #3
0
ファイル: demo.py プロジェクト: KevinJ-Huang/noah-research
def do_depth(left_image, rigth_image, stereonet_checkpoint, output_folder,
             intermediate, verbose, timming):
    #defines verbose level
    if verbose >= 2:
        tf.logging.set_verbosity(tf.logging.INFO)
    elif verbose >= 1:
        tf.logging.set_verbosity(tf.logging.WARN)
    else:
        tf.logging.set_verbosity(tf.logging.ERROR)
    #remove verbose bits from tf
    #os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    #Opening images
    tf.logging.info("Opening Images")
    left_img = np.expand_dims(Image.open(left_image), 0).astype(
        np.float32) / 255.0
    rigth_image = np.expand_dims(Image.open(rigth_image), 0).astype(
        np.float32) / 255.0
    #720p test
    #left_img = np.zeros([1,720/4,1280/4, 3])
    #rigth_image = np.zeros([1,720/4,1280/4,3])

    h, w = left_img.shape[1:3]
    tf.logging.info("Image size %d %d" % (h, w))

    #making output folder if needed
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    #Building the graph
    tf.logging.info("Making graph")
    left_img_ph = tf.placeholder(tf.float32, shape=[1, h, w, 3])
    rigth_img_ph = tf.placeholder(tf.float32, shape=[1, h, w, 3])
    net_output_ph = stereonet(left_img_ph, rigth_img_ph, is_training=True)

    #Runs the thing
    with tf.Session() as sess:
        tf.logging.info("Loading stereonet weigts")
        rv = optimistic_restore(sess, stereonet_checkpoint)
        tf.logging.info("Restored %d vars" % rv)
        tf.logging.info("Runing depth estimation")
        full_res_disparity_map, intermediate_results = sess.run(
            net_output_ph,
            feed_dict={
                left_img_ph: left_img,
                rigth_img_ph: rigth_image
            })
        tf.logging.info("Done")

        if timming:
            tf.logging.info("Getting timing")

            feed_dict = {left_img_ph: left_img, rigth_img_ph: rigth_image}

            avg_timing = time_up_to(sess, net_output_ph[0], feed_dict)
            avg_timing_input_left_raw = time_up_to(
                sess, net_output_ph[1]["input_left_raw"], feed_dict)
            avg_timing_input_left_preprocess = time_up_to(
                sess, net_output_ph[1]["input_left_preprocess"], feed_dict)
            avg_timing_tower_feature_right = time_up_to(
                sess, net_output_ph[1]["tower_feature_right"], feed_dict)
            avg_timing_tower_cost_volume_left_view_coarse = time_up_to(
                sess, net_output_ph[1]["cost_volume_left_view_coarse"],
                feed_dict)
            avg_timing_input_cost_volume_left_view = time_up_to(
                sess, net_output_ph[1]["cost_volume_left_view"], feed_dict)
            avg_timing_input_disparity_map_1_8 = time_up_to(
                sess, net_output_ph[1]["disparity_map_1_8"], feed_dict)
            avg_timing_input_disparity_map_1_4 = time_up_to(
                sess, net_output_ph[1]["disparity_map_1_4"], feed_dict)
            avg_timing_input_disparity_map_1_2 = time_up_to(
                sess, net_output_ph[1]["disparity_map_1_2"], feed_dict)

            tf.logging.info("Timeline:")
            tf.logging.info("%fs to get to the raw input" %
                            (avg_timing_input_left_raw))
            tf.logging.info("%fs to get to the processed input" %
                            (avg_timing_input_left_preprocess))
            tf.logging.info("%fs to get to the image features" %
                            (avg_timing_tower_feature_right))
            tf.logging.info("%fs to get to the coarse cost volume" %
                            (avg_timing_tower_cost_volume_left_view_coarse))
            tf.logging.info("%fs to get to the cost volume" %
                            (avg_timing_input_cost_volume_left_view))
            tf.logging.info("%fs to get to the disparity at 1/8" %
                            (avg_timing_input_disparity_map_1_8))
            tf.logging.info("%fs to get to the disparity at 1/4" %
                            (avg_timing_input_disparity_map_1_4))
            tf.logging.info("%fs to get to the disparity at 1/2" %
                            (avg_timing_input_disparity_map_1_2))
            tf.logging.info("%fs to get to the disparity at 1/1" %
                            (avg_timing))

            tf.logging.info("Differencial:")
            tf.logging.info("%fs to get to the raw input" %
                            (avg_timing_input_left_raw))
            tf.logging.info(
                "%fs to get to the processed input" %
                (avg_timing_input_left_preprocess - avg_timing_input_left_raw))
            tf.logging.info("%fs to get to the image features" %
                            (avg_timing_tower_feature_right -
                             avg_timing_input_left_preprocess))
            tf.logging.info("%fs to get to the coarse cost volume" %
                            (avg_timing_tower_cost_volume_left_view_coarse -
                             avg_timing_tower_feature_right))
            tf.logging.info("%fs to get to the cost volume" %
                            (avg_timing_input_cost_volume_left_view -
                             avg_timing_tower_cost_volume_left_view_coarse))
            tf.logging.info("%fs to get to the disparity at 1/8" %
                            (avg_timing_input_disparity_map_1_8 -
                             avg_timing_input_cost_volume_left_view))
            tf.logging.info("%fs to get to the disparity at 1/4" %
                            (avg_timing_input_disparity_map_1_4 -
                             avg_timing_input_disparity_map_1_8))
            tf.logging.info("%fs to get to the disparity at 1/2" %
                            (avg_timing_input_disparity_map_1_2 -
                             avg_timing_input_disparity_map_1_4))
            tf.logging.info("%fs to get to the disparity at 1/1" %
                            (avg_timing - avg_timing_input_disparity_map_1_2))

        tf.logging.info("Saving to " + output_folder + "/disparity_image.png")

        def disp2im(disp):
            disp = disp[0, :, :, :]
            disp = (disp - np.amin(disp)) / (np.amax(disp) - np.amin(disp))
            return (np.tile(disp, [1, 1, 3]) * 255).astype(np.uint8)

        Image.fromarray(disp2im(full_res_disparity_map)).save(output_folder +
                                                              "/disparity.png")

        if intermediate:
            #cost_volume_left_view=intermediate_results["cost_volume_left_view"]
            disparity_map_1_2 = intermediate_results["disparity_map_1_2"]
            disparity_map_1_4 = intermediate_results["disparity_map_1_4"]
            disparity_map_1_8 = intermediate_results["disparity_map_1_8"]
            tf.logging.info("Saving intermediate output")
            Image.fromarray(
                disp2im(disparity_map_1_2)).save(output_folder +
                                                 "/disparity_1_2.png")
            Image.fromarray(
                disp2im(disparity_map_1_4)).save(output_folder +
                                                 "/disparity_map_1_4.png")
            Image.fromarray(
                disp2im(disparity_map_1_8)).save(output_folder +
                                                 "/disparity_map_1_8.png")