def renderWithError(stepsize, prevImage):
     rendererArgs.stepsize = stepsize
     timer.start()
     pyrenderer.render(STEPPING_KERNEL, volume, rendererArgs, output)
     timer.stop()
     outputStepping = np.array(output.copy_to_cpu())
     timeStepping = timer.elapsed_ms()
     outputSteppingBlended = blendToWhite(outputStepping)
     error = np.max(np.abs(prevImage - outputSteppingBlended))
     print("step size", stepsize, "-> error", error)
     return error, timeStepping, outputSteppingBlended
Esempio n. 2
0
def runTimings(mode: str):
    # CONFIGURATION
    SETTINGS_FILE = "../scenes/dvrSphere-scene1.json"
    ROOT_PATH = ".."
    RESOLUTION = (512, 512)
    NUM_FRAMES = 1
    VOLUME_RESOLUTION = 128
    KERNEL_NAMES = [
        ("DVR: DDA - fixed step (control points)",
         "dvr stepping 0.0001\n(Baseline)", 0.001),
        ("DVR: Fixed step size - trilinear", "stepping 0.1", 0.1),
        ("DVR: Fixed step size - trilinear", "stepping 0.01", 0.01),
        ("DVR: Fixed step size - trilinear", "stepping 0.001", 0.001),
        ("DVR: DDA - interval simple", "interval - simple", 1),
        ("DVR: DDA - interval stepping (3)", "interval - stepping-3", 1),
        ("DVR: DDA - interval trapezoid (2)", "interval - trapezoid-2", 1),
        ("DVR: DDA - interval trapezoid (4)", "interval - trapezoid-4", 1),
        ("DVR: DDA - interval trapezoid (10)", "interval - trapezoid-10", 1),
        ("DVR: DDA - interval Simpson (2)", "interval - Simpson-2", 1),
        ("DVR: DDA - interval Simpson (4)", "interval - Simpson-4", 1),
        ("DVR: DDA - interval Simpson (10)", "interval - Simpson-10", 1),
        ("DVR: DDA - interval Simpson adapt",
         "interval - Simpson-adaptive e-3", 1e-3 * VOLUME_RESOLUTION),
        ("DVR: DDA - interval Simpson adapt",
         "interval - Simpson-adaptive e-5", 1e-5 * VOLUME_RESOLUTION),
        #("DVR: DDA - interval trapezoid var", "interval - trapezoid-var 0.1", 0.1),
        #("DVR: DDA - interval trapezoid var", "interval - trapezoid-var 0.01", 0.01),
        #("DVR: DDA - interval trapezoid var", "interval - trapezoid-var 0.001", 0.001),
        ("DVR: Marching Cubes", "marching cubes 1", 1 / 1 - 0.001
         ),  # number of subdivisions
        ("DVR: Marching Cubes", "marching cubes 2", 1 / 2 - 0.001),
        ("DVR: Marching Cubes", "marching cubes 4", 1 / 4 - 0.001),
        ("DVR: Marching Cubes", "marching cubes 8", 1 / 8 - 0.001),
        ("DVR: Marching Cubes", "marching cubes 16", 1 / 16 - 0.001),
    ]
    DENSITY_STEPS = 7
    MIN_DENSITY_DIFFERENCE = 0.005  # minimal difference between min and max density
    TIMING_STEPS = 50
    OUTPUT_STATS_ALL = "../results/statistics/dvr-sphere/timings-all-%s.tsv"
    OUTPUT_STATS_AVG = "../results/statistics/dvr-sphere/timings-avg-%s.tsv"
    OUTPUT_HISTO_ALL = "../results/statistics/dvr-sphere/histograms-%s.tsv"
    OUTPUT_HISTO_CFG = "../results/statistics/dvr-sphere/histogram-cfg-%s.tsv"
    OUTPUT_STATS_USE_DOUBLE = False
    OUTPUT_IMAGE_PATH = "../results/statistics/dvr-sphere/images/"
    OUTPUT_INSTRUMENTATION = "../results/statistics/dvr-sphere/instrumentation.tsv"

    HISTO_NUM_BINS = 100
    HISTO_BIN_MIN = np.log10(1e-6)
    HISTO_BIN_MAX = np.log10(1)
    HISTO_BIN_EDGES = [0.0] + list(10**np.linspace(
        HISTO_BIN_MIN, HISTO_BIN_MAX, HISTO_NUM_BINS))
    print("histogram bins:", HISTO_BIN_EDGES)

    pyrenderer.oit.set_fragment_buffer_size(2**26)
    pyrenderer.oit.set_marching_cubes_mode(
        pyrenderer.oit.MarchingCubesComputationMode.OnTheFly)
    pyrenderer.oit.set_max_fragments_per_pixel(256)
    pyrenderer.oit.set_tile_size(256)

    os.makedirs(OUTPUT_IMAGE_PATH, exist_ok=True)

    # load settings file
    rendererArgs, camera, volumePath = pyrenderer.load_from_json(
        SETTINGS_FILE, ROOT_PATH)
    print("settings loaded")
    rendererArgs.width = RESOLUTION[0]
    rendererArgs.height = RESOLUTION[1]
    base_min_density = rendererArgs.min_density
    base_max_density = rendererArgs.max_density
    base_opacity = rendererArgs.opacity_scaling

    # create density+opacity test cases
    end_max_density = 0.5 * (base_min_density +
                             base_max_density) + MIN_DENSITY_DIFFERENCE
    max_densities = np.exp(
        np.linspace(np.log(base_max_density), np.log(end_max_density),
                    DENSITY_STEPS))
    scaling = (base_max_density - base_min_density) / (max_densities -
                                                       base_min_density)
    end_min_density = 0.5 * (base_min_density +
                             base_max_density) - MIN_DENSITY_DIFFERENCE
    min_densities = np.exp(
        np.linspace(np.log(base_min_density), np.log(end_min_density),
                    DENSITY_STEPS))
    scaling = (base_max_density - base_min_density) / (max_densities -
                                                       min_densities)
    opacities = base_opacity * scaling
    print("min_densities:", min_densities)
    print("max_densities:", max_densities)
    print("opacities:", opacities)

    # create volume
    print("Create Marschner Lobb")
    volume = pyrenderer.Volume.create_implicit(
        pyrenderer.ImplicitEquation.Sphere, VOLUME_RESOLUTION)
    print("Loaded volumed of resolution", volume.resolution, "and world size",
          volume.world_size)
    volume.copy_to_gpu()

    # allocate timing
    timer = pyrenderer.GpuTimer()
    times = [[None] * DENSITY_STEPS for i in range(len(KERNEL_NAMES))]

    if mode == "visualize":
        # allocate output
        pyrenderer.reload_kernels(enableDebugging=False,
                                  enableInstrumentation=False)
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [[None] * DENSITY_STEPS for i in range(len(KERNEL_NAMES))]

        # render
        camera.update_render_args(rendererArgs)
        for j, (kernel_name, _, stepsize) in enumerate(KERNEL_NAMES):
            print("Render", kernel_name, stepsize)
            rendererArgs.stepsize = stepsize
            for i in range(DENSITY_STEPS):
                rendererArgs.min_density = min_densities[i]
                rendererArgs.max_density = max_densities[i]
                rendererArgs.opacity_scaling = opacities[i]
                timer.start()
                pyrenderer.render(kernel_name, volume, rendererArgs, output)
                timer.stop()
                outputs[j][i] = np.array(output.copy_to_cpu())
                times[j][i] = timer.elapsed_ms()

        def slugify(value):
            """
            Normalizes string, converts to lowercase, removes non-alpha characters,
            and converts spaces to hyphens.
            """
            import unicodedata
            import re
            value = str(unicodedata.normalize(
                'NFKD', value))  #.encode('ascii', 'ignore'))
            value = re.sub('[^\w\s-]', '', value).strip().lower()
            value = re.sub('[-\s]+', '-', value)
            return value

        # visualize
        print("Visualize")
        #fig, axes = plt.subplots(nrows=len(KERNEL_NAMES), ncols=DENSITY_STEPS)
        for j, (kernel_name, human_kernel_name, _) in enumerate(KERNEL_NAMES):
            for i in range(DENSITY_STEPS):
                filename = os.path.join(
                    OUTPUT_IMAGE_PATH,
                    slugify("%s__%d" % (human_kernel_name, i)) + ".png")
                imageio.imwrite(filename, outputs[j][i][:, :, 0:4])

                #axes[j][i].imshow(outputs[j][i][:,:,0:4])
                #axes[j][i].set_title("time=%.2fms"%times[j][i])
                #if j==len(KERNEL_NAMES)-1:
                #    axes[j][i].set_xlabel("range=%.3f"%(
                #        max_densities[i]-base_min_density))
                #if i==0:
                #    axes[j][i].set_ylabel(human_kernel_name)

        # save to numpy
        npz_output = {}
        npz_output['kernels'] = KERNEL_NAMES
        npz_output['densities'] = [
            max_densities[i] - base_min_density for i in range(DENSITY_STEPS)
        ]
        for j in range(len(KERNEL_NAMES)):
            for i in range(DENSITY_STEPS):
                npz_output['img_%d_%d' % (j, i)] = outputs[j][i]
        np.savez(os.path.join(OUTPUT_IMAGE_PATH, "raw.npz"), **npz_output)

        #plt.subplots_adjust(left=0.03, bottom=0.05, right=0.99, top=0.97, wspace=0.20, hspace=0.23)
        #plt.show()

    elif mode == "measure":
        summed_times = [[0] * DENSITY_STEPS for i in range(len(KERNEL_NAMES))]

        pyrenderer.reload_kernels(enableDebugging=False,
                                  enableInstrumentation=False,
                                  otherPreprocessorArguments=[
                                      "-DKERNEL_USE_DOUBLE=%s" %
                                      ("1" if OUTPUT_STATS_USE_DOUBLE else "0")
                                  ])
        # allocate output for baseline
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [[None] * TIMING_STEPS for i in range(DENSITY_STEPS)]

        histograms = [[
            np.zeros(HISTO_NUM_BINS, dtype=np.int64)
            for i in range(DENSITY_STEPS)
        ] for j in range(len(KERNEL_NAMES))]

        # render and write output
        with open(
                OUTPUT_STATS_ALL %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tTF-Range\tFrame\tTime (ms)\tPSNR (dB)\n")
            for j, (kernel_name, human_kernel_name,
                    stepsize) in enumerate(KERNEL_NAMES):
                print("Render", kernel_name, stepsize)
                rendererArgs.stepsize = stepsize
                for i in range(DENSITY_STEPS):
                    rendererArgs.min_density = min_densities[i]
                    rendererArgs.max_density = max_densities[i]
                    rendererArgs.opacity_scaling = opacities[i]
                    histogram = histograms[j][i]
                    histogram_edges = None
                    for k in range(TIMING_STEPS):
                        camera.yaw = k * 360 / TIMING_STEPS
                        camera.update_render_args(rendererArgs)
                        timer.start()
                        pyrenderer.render(kernel_name, volume, rendererArgs,
                                          output)
                        timer.stop()
                        out_img = np.array(output.copy_to_cpu())
                        if j == 0:  # baseline
                            outputs[i][k] = out_img
                            psnr = 0
                        else:
                            # compute psnr
                            maxValue = np.max(outputs[i][k][:, :, 0:4])
                            mse = ((outputs[i][k][:, :, 0:4] -
                                    out_img[:, :, 0:4])**2).mean(axis=None)
                            psnr = 20 * np.log10(maxValue) - 10 * np.log10(mse)
                            # compute histogram
                            diff = outputs[i][k][:, :, 0:4] - out_img[:, :,
                                                                      0:4]
                            new_histogram, histogram_edges = np.histogram(
                                diff, bins=HISTO_BIN_EDGES)
                            histogram += new_histogram
                        t = timer.elapsed_ms()
                        summed_times[j][i] += t
                        f.write(
                            "%s\t%.4f\t%d\t%.4f\t%.4f\n" %
                            (human_kernel_name.replace("\n", " "),
                             max_densities[i] - base_min_density, k, t, psnr))

        # write average stats
        with open(
                OUTPUT_STATS_AVG %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tTF-Range\tAvg-Time (ms)\n")
            for j, (_, human_kernel_name, _) in enumerate(KERNEL_NAMES):
                for i in range(DENSITY_STEPS):
                    f.write("%s\t%.4f\t%.4f\n" %
                            (human_kernel_name.replace("\n", " "),
                             max_densities[i] - base_min_density,
                             summed_times[j][i] / TIMING_STEPS))

        # write histograms
        with open(
                OUTPUT_HISTO_ALL %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("BinStart\tBinEnd")
            for j in range(len(KERNEL_NAMES)):
                for i in range(DENSITY_STEPS):
                    f.write("\t%d-%d" % (j, i))
            f.write("\n")
            for b in range(HISTO_NUM_BINS):
                f.write("%.10f\t%.10f" %
                        (HISTO_BIN_EDGES[b], HISTO_BIN_EDGES[b + 1]))
                for j in range(len(KERNEL_NAMES)):
                    for i in range(DENSITY_STEPS):
                        f.write("\t%d" % histograms[j][i][b])
                f.write("\n")
        with open(
                OUTPUT_HISTO_CFG %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tTF-Range\tConfig-ID\n")
            for j, (_, human_kernel_name, _) in enumerate(KERNEL_NAMES):
                for i in range(DENSITY_STEPS):
                    f.write("%s\t%.4f\t%s\n" %
                            (human_kernel_name.replace("\n", " "),
                             max_densities[i] - base_min_density, "%d-%d" %
                             (j, i)))

    elif mode == "instrumentation":
        # recompile with instrumentation
        pyrenderer.reload_kernels(enableInstrumentation=True)

        # allocate output
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [[None] * DENSITY_STEPS for i in range(len(KERNEL_NAMES))]

        fields = [
            "densityFetches", "tfFetches", "ddaSteps", "isoIntersections",
            "intervalEval", "intervalStep", "intervalMaxStep"
        ]

        # render
        with open(OUTPUT_INSTRUMENTATION, "w") as f:
            f.write(
                "Kernel Name\tTF-Range\t%s\tavgIntervalSteps\tnumTriangles\tnumFragments\n"
                % "\t".join(fields))
            camera.update_render_args(rendererArgs)
            for j, (kernel_name, human_kernel_name,
                    stepsize) in enumerate(KERNEL_NAMES):
                print("Render", kernel_name, stepsize)
                rendererArgs.stepsize = stepsize
                for i in range(DENSITY_STEPS):
                    rendererArgs.min_density = min_densities[i]
                    rendererArgs.max_density = max_densities[i]
                    rendererArgs.opacity_scaling = opacities[i]
                    instrumentations, globalInstrumentations = \
                        pyrenderer.render_with_instrumentation(
                            kernel_name, volume, rendererArgs, output)
                    f.write("%s\t%s" % (human_kernel_name.replace(
                        "\n", " "), max_densities[i] - base_min_density))
                    for field in fields:
                        f.write(
                            "\t%.2f" %
                            np.mean(instrumentations[field], dtype=np.float32))
                    avgIntervalSteps = np.mean(
                        instrumentations["intervalStep"].astype(np.float32) /
                        instrumentations["intervalEval"].astype(np.float32))
                    f.write(
                        "\t%.2f\t%d\t%d\n" %
                        (avgIntervalSteps, globalInstrumentations.numTriangles,
                         globalInstrumentations.numFragments))
                    f.flush()
def runTimings(mode: str):
    # CONFIGURATION
    SETTINGS_FILE = "../scenes/dvrLobb-scene4.json"
    ROOT_PATH = ".."
    RESOLUTION = (512, 512)
    NUM_FRAMES = 1
    VOLUME_RESOLUTION = 128
    KERNEL_NAMES_NORMAL, KERNEL_NAMES_TIMING = utils.getKernelNames(
        VOLUME_RESOLUTION)
    DENSITY_STEPS = 10
    WIDTH_START = 0.05
    WIDTH_END = 0.002
    SCALING_START = 20.0
    EXPONENT = 0.1
    TIMING_STEPS = utils.TIMING_STEPS

    OUTPUT_STATS_ALL = "../results/statistics/dvr-marschner-lobb/timings-all-%s.tsv"
    OUTPUT_STATS_AVG = "../results/statistics/dvr-marschner-lobb/timings-avg-%s.tsv"
    OUTPUT_HISTO_ALL = "../results/statistics/dvr-marschner-lobb/histograms-%s.tsv"
    OUTPUT_HISTO_CFG = "../results/statistics/dvr-marschner-lobb/histogram-cfg-%s.tsv"
    OUTPUT_STATS_USE_DOUBLE = False
    OUTPUT_IMAGE_PATH = "../results/statistics/dvr-marschner-lobb/images/"
    OUTPUT_INSTRUMENTATION = "../results/statistics/dvr-marschner-lobb/instrumentation.tsv"

    HISTO_NUM_BINS = 100
    HISTO_BIN_MIN = np.log10(1e-6)
    HISTO_BIN_MAX = np.log10(1)
    HISTO_BIN_EDGES = [0.0] + list(10**np.linspace(
        HISTO_BIN_MIN, HISTO_BIN_MAX, HISTO_NUM_BINS))
    print("histogram bins:", HISTO_BIN_EDGES)

    pyrenderer.oit.set_fragment_buffer_size(2**26)
    pyrenderer.oit.set_marching_cubes_mode(
        pyrenderer.oit.MarchingCubesComputationMode.OnTheFly)
    pyrenderer.oit.set_max_fragments_per_pixel(512)
    pyrenderer.oit.set_tile_size(128)

    os.makedirs(OUTPUT_IMAGE_PATH, exist_ok=True)

    # load settings file
    rendererArgs, camera, volumePath = pyrenderer.load_from_json(
        SETTINGS_FILE, ROOT_PATH)
    print("settings loaded")
    rendererArgs.width = RESOLUTION[0]
    rendererArgs.height = RESOLUTION[1]
    camera.update_render_args(rendererArgs)
    multiiso_tf = rendererArgs.get_multiiso_tf()

    # create volume
    print("Create Marschner Lobb")
    volume = pyrenderer.Volume.create_implicit(
        pyrenderer.ImplicitEquation.MarschnerLobb, VOLUME_RESOLUTION)
    print("Loaded volumed of resolution", volume.resolution, "and world size",
          volume.world_size)
    volume.copy_to_gpu()

    def computeSettings(time, rendererArgs):
        settings = rendererArgs.clone()
        width = pow(
            utils.lerp(time, pow(WIDTH_START, EXPONENT),
                       pow(WIDTH_END, EXPONENT)), 1 / EXPONENT)
        scaling = SCALING_START * WIDTH_START / width
        settings.set_linear_tf(utils.multiIso2Linear(multiiso_tf, width))
        settings.opacity_scaling = scaling
        return settings, width

    # allocate timing
    timer = pyrenderer.GpuTimer()
    times = [[None] * DENSITY_STEPS for i in range(len(KERNEL_NAMES_NORMAL))]

    if mode == "visualize":
        # allocate output
        pyrenderer.reload_kernels(enableDebugging=False,
                                  enableInstrumentation=False)
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [[None] * DENSITY_STEPS
                   for i in range(len(KERNEL_NAMES_NORMAL))]

        # render
        camera.update_render_args(rendererArgs)
        for j, (kernel_name, _, stepsize) in enumerate(KERNEL_NAMES_NORMAL):
            print("Render", kernel_name, stepsize)
            rendererArgs.stepsize = stepsize
            for i in range(DENSITY_STEPS):
                settings, width = computeSettings(i / (DENSITY_STEPS - 1),
                                                  rendererArgs)
                timer.start()
                pyrenderer.render(kernel_name, volume, settings, output)
                timer.stop()
                outputs[j][i] = np.array(output.copy_to_cpu())
                times[j][i] = timer.elapsed_ms()

        def slugify(value):
            """
      Normalizes string, converts to lowercase, removes non-alpha characters,
      and converts spaces to hyphens.
      """
            import unicodedata
            import re
            value = str(unicodedata.normalize(
                'NFKD', value))  #.encode('ascii', 'ignore'))
            value = re.sub('[^\w\s-]', '', value).strip().lower()
            value = re.sub('[-\s]+', '-', value)
            return value

        # visualize
        print("Visualize")
        #fig, axes = plt.subplots(nrows=len(KERNEL_NAMES_NORMAL), ncols=DENSITY_STEPS)
        for j, (kernel_name, human_kernel_name,
                _) in enumerate(KERNEL_NAMES_NORMAL):
            for i in range(DENSITY_STEPS):
                filename = os.path.join(
                    OUTPUT_IMAGE_PATH,
                    slugify("%s__%d" % (human_kernel_name, i)) + ".png")
                imageio.imwrite(filename, outputs[j][i][:, :, 0:4])

                #axes[j][i].imshow(outputs[j][i][:,:,0:4])
                #axes[j][i].set_title("time=%.2fms"%times[j][i])
                #if j==len(KERNEL_NAMES_NORMAL)-1:
                #    axes[j][i].set_xlabel("range=%.3f"%(
                #        max_densities[i]-base_min_density))
                #if i==0:
                #    axes[j][i].set_ylabel(human_kernel_name)

        # save to numpy
        npz_output = {}
        npz_output['kernels'] = KERNEL_NAMES_NORMAL
        npz_output['widths'] = [
            computeSettings(i / (DENSITY_STEPS - 1), rendererArgs)[1]
            for i in range(DENSITY_STEPS)
        ]
        for j in range(len(KERNEL_NAMES_NORMAL)):
            for i in range(DENSITY_STEPS):
                npz_output['img_%d_%d' % (j, i)] = outputs[j][i]
        np.savez(os.path.join(OUTPUT_IMAGE_PATH, "raw.npz"), **npz_output)

        #plt.subplots_adjust(left=0.03, bottom=0.05, right=0.99, top=0.97, wspace=0.20, hspace=0.23)
        #plt.show()

    elif mode == "measure":
        summed_times = [[0] * DENSITY_STEPS
                        for i in range(len(KERNEL_NAMES_NORMAL))]

        pyrenderer.reload_kernels(enableDebugging=False,
                                  enableInstrumentation=False,
                                  otherPreprocessorArguments=[
                                      "-DKERNEL_USE_DOUBLE=%s" %
                                      ("1" if OUTPUT_STATS_USE_DOUBLE else "0")
                                  ])
        # allocate output for baseline
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [[None] * TIMING_STEPS for i in range(DENSITY_STEPS)]

        histograms = [[
            np.zeros(HISTO_NUM_BINS, dtype=np.int64)
            for i in range(DENSITY_STEPS)
        ] for j in range(len(KERNEL_NAMES_NORMAL))]

        # render and write output
        with open(
                OUTPUT_STATS_ALL %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tWidth\tFrame\tTime (ms)\tPSNR (dB)\n")
            #for j, (kernel_name, human_kernel_name, stepsize) in enumerate(KERNEL_NAMES):
            for j in range(len(KERNEL_NAMES_NORMAL)):
                kernel_name_normal, human_kernel_name, stepsize = KERNEL_NAMES_NORMAL[
                    j]
                kernel_name_timings, _, _ = KERNEL_NAMES_TIMING[j]
                print("Render", kernel_name_normal, stepsize)
                rendererArgs.stepsize = stepsize
                for i in range(DENSITY_STEPS):
                    settings, width = computeSettings(i / (DENSITY_STEPS - 1),
                                                      rendererArgs)
                    print("  width=%.4f, scaling=%.4f" %
                          (width, settings.opacity_scaling))
                    histogram = histograms[j][i]
                    histogram_edges = None
                    for k in range(TIMING_STEPS):
                        camera.yaw = k * 360 / TIMING_STEPS
                        camera.update_render_args(settings)
                        timer.start()
                        pyrenderer.render(kernel_name_timings, volume,
                                          settings, output)
                        timer.stop()
                        if kernel_name_timings != kernel_name_normal:
                            pyrenderer.render(kernel_name_normal, volume,
                                              settings, output)
                        out_img = np.array(output.copy_to_cpu())
                        if j == 0:  # baseline
                            outputs[i][k] = out_img
                            psnr = 0
                        else:
                            # compute psnr
                            maxValue = 1.0  #np.max(outputs[i][k][:,:,0:4])
                            mse = ((outputs[i][k][:, :, 0:4] -
                                    out_img[:, :, 0:4])**2).mean(axis=None)
                            psnr = 20 * np.log10(maxValue) - 10 * np.log10(mse)
                            #psnr = np.mean(np.abs(outputs[i][k][:,:,0:4] - out_img[:,:,0:4]))
                            # compute histogram
                            diff = np.abs(outputs[i][k][:, :, 0:4] -
                                          out_img[:, :, 0:4])
                            new_histogram, histogram_edges = np.histogram(
                                diff, bins=HISTO_BIN_EDGES)
                            histogram += new_histogram
                        t = timer.elapsed_ms()
                        summed_times[j][i] += t
                        f.write("%s\t%.4f\t%d\t%.4f\t%.4f\n" %
                                (human_kernel_name.replace(
                                    "\n", " "), width, k, t, psnr))

        # write average stats
        with open(
                OUTPUT_STATS_AVG %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tWidth\tAvg-Time (ms)\n")
            for j, (_, human_kernel_name, _) in enumerate(KERNEL_NAMES_NORMAL):
                for i in range(DENSITY_STEPS):
                    _, width = computeSettings(i / (DENSITY_STEPS - 1),
                                               rendererArgs)
                    f.write("%s\t%.4f\t%.4f\n" % (human_kernel_name.replace(
                        "\n", " "), width, summed_times[j][i] / TIMING_STEPS))

        # write histograms
        with open(
                OUTPUT_HISTO_ALL %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("BinStart\tBinEnd")
            for j in range(len(KERNEL_NAMES_NORMAL)):
                for i in range(DENSITY_STEPS):
                    f.write("\t%d-%d" % (j, i))
            f.write("\n")
            for b in range(HISTO_NUM_BINS):
                f.write("%.10f\t%.10f" %
                        (HISTO_BIN_EDGES[b], HISTO_BIN_EDGES[b + 1]))
                for j in range(len(KERNEL_NAMES_NORMAL)):
                    for i in range(DENSITY_STEPS):
                        f.write("\t%d" % histograms[j][i][b])
                f.write("\n")
        with open(
                OUTPUT_HISTO_CFG %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tWidth\tConfig-ID\n")
            for j, (_, human_kernel_name, _) in enumerate(KERNEL_NAMES_NORMAL):
                for i in range(DENSITY_STEPS):
                    f.write("%s\t%.4f\t%s\n" %
                            (human_kernel_name.replace("\n", " "),
                             computeSettings(i / (DENSITY_STEPS - 1),
                                             rendererArgs)[1], "%d-%d" %
                             (j, i)))

    elif mode == "instrumentation":
        # recompile with instrumentation
        pyrenderer.reload_kernels(enableInstrumentation=True)

        # allocate output
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [[None] * DENSITY_STEPS
                   for i in range(len(KERNEL_NAMES_NORMAL))]

        fields = [
            "densityFetches", "tfFetches", "ddaSteps", "isoIntersections",
            "intervalEval", "intervalStep", "intervalMaxStep"
        ]

        # render
        with open(OUTPUT_INSTRUMENTATION, "w") as f:
            f.write(
                "Kernel Name\tWidth\t%s\tavgIntervalSteps\tnumTriangles\tnumFragments\n"
                % "\t".join(fields))
            camera.update_render_args(rendererArgs)
            for j, (kernel_name, human_kernel_name,
                    stepsize) in enumerate(KERNEL_NAMES_NORMAL):
                print("Render", kernel_name, stepsize)
                rendererArgs.stepsize = stepsize
                for i in range(DENSITY_STEPS):
                    settings, width = computeSettings(i / (DENSITY_STEPS - 1),
                                                      rendererArgs)
                    instrumentations, globalInstrumentations = \
                      pyrenderer.render_with_instrumentation(
                        kernel_name, volume, settings, output)
                    f.write("%s\t%s" %
                            (human_kernel_name.replace("\n", " "), width))
                    for field in fields:
                        f.write(
                            "\t%.2f" %
                            np.mean(instrumentations[field], dtype=np.float32))
                    avgIntervalSteps = np.mean(
                        instrumentations["intervalStep"].astype(np.float32) /
                        instrumentations["intervalEval"].astype(np.float32))
                    f.write(
                        "\t%.2f\t%d\t%d\n" %
                        (avgIntervalSteps, globalInstrumentations.numTriangles,
                         globalInstrumentations.numFragments))
                    f.flush()
Esempio n. 4
0
def runTimings(mode: str):
    # CONFIGURATION
    SETTINGS_FILE = "../scenes/bigBug1.json"
    ROOT_PATH = ".."
    RESOLUTION = (512, 512)
    NUM_FRAMES = 1
    VOLUME_RESOLUTION = 128
    KERNEL_NAMES = [
        ("DVR: DDA - fixed step (control points)", "dvr stepping 0.0001\n(Baseline)", 0.0001)] + \
        [("DVR: Fixed step size - trilinear", "stepping 2^-%d"%s, pow(2, -s)) for s in range(2,15)]
    DENSITY_STEPS = 7
    MIN_DENSITY_DIFFERENCE = 0.01  # minimal difference between min and max density
    TIMING_STEPS = 1
    OUTPUT_STATS_ALL = "../results/statistics/dvr-beetle/timingsConvergence-all-%s.tsv"
    OUTPUT_STATS_AVG = "../results/statistics/dvr-beetle/timingsConvergence-avg-%s.tsv"
    OUTPUT_HISTO_ALL = "../results/statistics/dvr-beetle/histogramsConvergence-%s.tsv"
    OUTPUT_HISTO_CFG = "../results/statistics/dvr-beetle/histogramConvergence-cfg-%s.tsv"
    OUTPUT_STATS_USE_DOUBLE = True
    os.makedirs("../results/statistics/dvr-beetle/", exist_ok=True)

    HISTO_NUM_BINS = 100
    HISTO_BIN_MIN = np.log10(1e-6)
    HISTO_BIN_MAX = np.log10(1)
    HISTO_BIN_EDGES = [0.0] + list(10**np.linspace(
        HISTO_BIN_MIN, HISTO_BIN_MAX, HISTO_NUM_BINS))
    print("histogram bins:", HISTO_BIN_EDGES)

    pyrenderer.oit.set_fragment_buffer_size(2**26)
    pyrenderer.oit.set_marching_cubes_mode(
        pyrenderer.oit.MarchingCubesComputationMode.OnTheFly)
    pyrenderer.oit.set_max_fragments_per_pixel(512)
    pyrenderer.oit.set_tile_size(128)

    # load settings file
    rendererArgs, camera, volumePath = pyrenderer.load_from_json(
        SETTINGS_FILE, ROOT_PATH)
    print("settings loaded")
    rendererArgs.width = RESOLUTION[0]
    rendererArgs.height = RESOLUTION[1]
    base_min_density = rendererArgs.min_density
    base_max_density = rendererArgs.max_density
    base_opacity = rendererArgs.opacity_scaling

    # create density+opacity test cases
    end_max_density = base_min_density + MIN_DENSITY_DIFFERENCE
    max_densities = np.exp(
        np.linspace(np.log(base_max_density), np.log(end_max_density),
                    DENSITY_STEPS))
    scaling = (base_max_density - base_min_density) / (max_densities -
                                                       base_min_density)
    opacities = base_opacity * scaling

    # create volume
    print("Create Marschner Lobb")
    volume = pyrenderer.Volume(volumePath)
    print("Loaded volumed of resolution", volume.resolution, "and world size",
          volume.world_size)
    volume.copy_to_gpu()

    # allocate timing
    timer = pyrenderer.GpuTimer()
    times = [[None] * DENSITY_STEPS for i in range(len(KERNEL_NAMES))]

    if mode == "measure":
        summed_times = [[0] * DENSITY_STEPS for i in range(len(KERNEL_NAMES))]

        pyrenderer.reload_kernels(enableDebugging=False,
                                  enableInstrumentation=False,
                                  otherPreprocessorArguments=[
                                      "-DKERNEL_USE_DOUBLE=%s" %
                                      ("1" if OUTPUT_STATS_USE_DOUBLE else "0")
                                  ])
        # allocate output for baseline
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [[None] * TIMING_STEPS for i in range(DENSITY_STEPS)]

        histograms = [[
            np.zeros(HISTO_NUM_BINS, dtype=np.int64)
            for i in range(DENSITY_STEPS)
        ] for j in range(len(KERNEL_NAMES))]

        # render and write output
        with open(
                OUTPUT_STATS_ALL %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tTF-Range\tFrame\tTime (ms)\tPSNR (dB)\n")
            for j, (kernel_name, human_kernel_name,
                    stepsize) in enumerate(KERNEL_NAMES):
                print("Render", kernel_name, stepsize)
                rendererArgs.stepsize = stepsize
                for i in range(DENSITY_STEPS):
                    rendererArgs.max_density = max_densities[i]
                    rendererArgs.opacity_scaling = opacities[i]
                    histogram = histograms[j][i]
                    histogram_edges = None
                    for k in range(TIMING_STEPS):
                        camera.yaw = k * 360 / TIMING_STEPS
                        camera.update_render_args(rendererArgs)
                        timer.start()
                        pyrenderer.render(kernel_name, volume, rendererArgs,
                                          output)
                        timer.stop()
                        out_img = np.array(output.copy_to_cpu())
                        if j == 0:  # baseline
                            outputs[i][k] = out_img
                            psnr = 0
                        else:
                            # compute psnr
                            maxValue = np.max(outputs[i][k][:, :, 0:4])
                            mse = ((outputs[i][k][:, :, 0:4] -
                                    out_img[:, :, 0:4])**2).mean(axis=None)
                            psnr = 20 * np.log10(maxValue) - 10 * np.log10(mse)
                            # compute histogram
                            diff = outputs[i][k][:, :, 0:4] - out_img[:, :,
                                                                      0:4]
                            new_histogram, histogram_edges = np.histogram(
                                diff, bins=HISTO_BIN_EDGES)
                            histogram += new_histogram
                        t = timer.elapsed_ms()
                        summed_times[j][i] += t
                        f.write(
                            "%s\t%.4f\t%d\t%.4f\t%.4f\n" %
                            (human_kernel_name.replace("\n", " "),
                             max_densities[i] - base_min_density, k, t, psnr))

        # write average stats
        with open(
                OUTPUT_STATS_AVG %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tTF-Range\tAvg-Time (ms)\n")
            for j, (_, human_kernel_name, _) in enumerate(KERNEL_NAMES):
                for i in range(DENSITY_STEPS):
                    f.write("%s\t%.4f\t%.4f\n" %
                            (human_kernel_name.replace("\n", " "),
                             max_densities[i] - base_min_density,
                             summed_times[j][i] / TIMING_STEPS))

        # write histograms
        with open(
                OUTPUT_HISTO_ALL %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("BinStart\tBinEnd")
            for j in range(len(KERNEL_NAMES)):
                for i in range(DENSITY_STEPS):
                    f.write("\t%d-%d" % (j, i))
            f.write("\n")
            for b in range(HISTO_NUM_BINS):
                f.write("%.10f\t%.10f" %
                        (HISTO_BIN_EDGES[b], HISTO_BIN_EDGES[b + 1]))
                for j in range(len(KERNEL_NAMES)):
                    for i in range(DENSITY_STEPS):
                        f.write("\t%d" % histograms[j][i][b])
                f.write("\n")
        with open(
                OUTPUT_HISTO_CFG %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tTF-Range\tConfig-ID\n")
            for j, (_, human_kernel_name, _) in enumerate(KERNEL_NAMES):
                for i in range(DENSITY_STEPS):
                    f.write("%s\t%.4f\t%s\n" %
                            (human_kernel_name.replace("\n", " "),
                             max_densities[i] - base_min_density, "%d-%d" %
                             (j, i)))
def renderImage(sceneFile: str, name: str, scaleIndependent: bool = False):
    # load scene file
    RESOLUTION = (1920, 1080)
    ROOT_PATH = ".."
    rendererArgs, camera, volumePath = pyrenderer.load_from_json(
        sceneFile, ROOT_PATH)
    print("\n\n============================\n")
    print("settings loaded:", sceneFile)
    rendererArgs.width = RESOLUTION[0]
    rendererArgs.height = RESOLUTION[1]
    volume = pyrenderer.Volume(volumePath)
    print("Loaded volumed of resolution", volume.resolution, "and world size",
          volume.world_size)
    volume.copy_to_gpu()

    # configuration
    ERROR = 1 / 256.0
    if scaleIndependent:
        STEPPING_KERNEL = "DVR: Scale invariant - trilinear"
        ANALYTIC_KERNEL = "DVR: Scale invariant - Simpson"
    elif rendererArgs.dvrUseShading:
        STEPPING_KERNEL = "DVR: Fixed step size - trilinear"
        ANALYTIC_KERNEL = "DVR: DDA - interval Simpson shaded"
    else:
        STEPPING_KERNEL = "DVR: Fixed step size - trilinear"
        ANALYTIC_KERNEL = "DVR: DDA - interval Simpson adapt"
    print("stepping kernel:", STEPPING_KERNEL)
    print("analytic kernel:", ANALYTIC_KERNEL)
    INITIAL_STEPSIZE = 0.25
    MIN_STEPSIZE = 1e-5
    BINARY_SEARCH_STEPS = 10
    os.makedirs(OUTPUT_IMAGE_PATH, exist_ok=True)
    outputImagePath = OUTPUT_IMAGE_PATH + os.path.splitext(
        os.path.basename(sceneFile))[0] + ".png"
    outputTexPath = OUTPUT_IMAGE_PATH + os.path.splitext(
        os.path.basename(sceneFile))[0] + ".tex"

    # allocate outputs
    timer = pyrenderer.GpuTimer()
    output = pyrenderer.allocate_output(rendererArgs.width,
                                        rendererArgs.height,
                                        rendererArgs.render_mode)
    camera.update_render_args(rendererArgs)

    # render analytic
    rendererArgs.stepsize = ERROR
    timer.start()
    pyrenderer.render(ANALYTIC_KERNEL, volume, rendererArgs, output)
    timer.stop()
    outputAnalytic = np.array(output.copy_to_cpu())
    outputAnalyticBlended = blendToWhite(outputAnalytic)
    timeAnalytic = timer.elapsed_ms()
    # and save
    imageio.imwrite(outputImagePath, outputAnalyticBlended)
    analyticResult = StepErrorTime(-1, ERROR, timeAnalytic)

    def renderWithError(stepsize, prevImage):
        rendererArgs.stepsize = stepsize
        timer.start()
        pyrenderer.render(STEPPING_KERNEL, volume, rendererArgs, output)
        timer.stop()
        outputStepping = np.array(output.copy_to_cpu())
        timeStepping = timer.elapsed_ms()
        outputSteppingBlended = blendToWhite(outputStepping)
        error = np.max(np.abs(prevImage - outputSteppingBlended))
        print("step size", stepsize, "-> error", error)
        return error, timeStepping, outputSteppingBlended

    # render stepping (coarse)
    errorSteppingCoarse, timeSteppingCoarse, outputSteppingCoarse = renderWithError(
        INITIAL_STEPSIZE, outputAnalyticBlended)
    previousError = errorSteppingCoarse
    errorSteppingCoarse = np.mean(
        np.abs(outputSteppingCoarse - outputAnalyticBlended))
    coarseResult = StepErrorTime(INITIAL_STEPSIZE, errorSteppingCoarse,
                                 timeSteppingCoarse)

    # search until maximal error of ERROR is reached
    stepsizeUpper = INITIAL_STEPSIZE
    stepsizeLower = INITIAL_STEPSIZE / 2
    previousImage = outputSteppingCoarse
    currentTime = None
    while True:
        error, currentTime, currentImage = renderWithError(
            stepsizeLower, previousImage)
        if error < ERROR:
            break
        if error > previousError * 1.2:
            print("ERROR: error increased, cancel binary search")
            stepsizeMid = stepsizeUpper
            error = previousError
            currentImage = previousImage
            BINARY_SEARCH_STEPS = 0
            break
        stepsizeUpper = stepsizeLower
        stepsizeLower /= 2
        previousImage = currentImage
        previousError = error
    # binary search
    for i in range(BINARY_SEARCH_STEPS):
        stepsizeMid = pow(
            2, 0.5 * (np.log2(stepsizeUpper) + np.log2(stepsizeLower)))
        #print(stepsizeUpper, stepsizeLower, "->", stepsizeMid)
        error, currentTime, currentImage = renderWithError(
            stepsizeMid, previousImage)
        if error < ERROR:  # increase stepsize (coarsen)
            stepsizeLower = stepsizeMid
        else:  # decrease stepsize (finer)
            stepsizeUpper = stepsizeMid
            previousImage = currentImage
    finalError = np.mean(np.abs(currentImage - outputAnalyticBlended))
    convergedResult = StepErrorTime(stepsizeMid, error, currentTime)
    print("Final stepsize:", stepsizeMid, "with an error of", error,
          "and a time of", currentTime, "ms")

    Results.append(
        SceneResult(name, analyticResult, coarseResult, convergedResult))
Esempio n. 6
0
def runTimings(mode: str):
    # CONFIGURATION
    SETTINGS_FILE = "../scenes/isoLobb-scene1.json"
    ROOT_PATH = ".."
    RESOLUTION = (512, 512)
    NUM_FRAMES = 1
    VOLUME_RESOLUTION = 128
    KERNEL_NAMES = [
        ("Iso: Fixed step size - nearest", "stepping nearest 0.1", 0.1),
        ("Iso: Fixed step size - trilinear", "stepping linear 0.1", 0.1),
        ("Iso: Fixed step size - tricubic", "stepping cubic 0.1", 0.1),
        ("Iso: Fixed step size - tricubic", "stepping cubic 0.01", 0.01),
        ("Iso: Fixed step size - tricubic", "stepping cubic 0.001", 0.001),
        ("Iso: DDA - [num] Marmitt (float, stable)", "DDA linear Marmitt", 1),
        ("Iso: Cubic DDA - fixed step (no poly)",
         "DDA cubic fixed (no poly) 0.1", 0.1),
        ("Iso: Cubic DDA - fixed step (no poly)",
         "DDA cubic fixed (no poly) 0.01", 0.01),
        ("Iso: Cubic DDA - fixed step (no poly)",
         "DDA cubic fixed (no poly) 0.001", 0.001),
        ("Iso: Cubic DDA - fixed step (loop)", "DDA cubic fixed (loop) 0.1",
         0.1),
        ("Iso: Cubic DDA - fixed step (loop)", "DDA cubic fixed (loop) 0.01",
         0.01),
        ("Iso: Cubic DDA - fixed step (loop)", "DDA cubic fixed (loop) 0.001",
         0.001),
        ("Iso: Cubic DDA - fixed step (explicit)",
         "DDA cubic fixed (explicit) 0.1", 0.1),
        ("Iso: Cubic DDA - fixed step (explicit)",
         "DDA cubic fixed (explicit) 0.01", 0.01),
        ("Iso: Cubic DDA - fixed step (explicit)",
         "DDA cubic fixed (explicit) 0.001", 0.001),
        ("Iso: Cubic DDA - Sphere Simple (loop)",
         "DDA cubic Sphere Simple (loop)", 1),
        ("Iso: Cubic DDA - Sphere Bernstein (loop)",
         "DDA cubic Sphere Bernstein (loop)", 1),
        ("Iso: Cubic DDA - Sphere Simple (explicit)",
         "DDA cubic Sphere Simple (explicit)", 1),
        ("Iso: Cubic DDA - Sphere Bernstein (explicit)",
         "DDA cubic Sphere Bernstein (explicit)", 1),
    ]
    KERNEL_NAMES_MEASURE = [
        ("Iso: Cubic DDA - fixed step (no poly)", "Baseline 0.001", 0.001),
        ("Iso: Cubic DDA - fixed step (no poly)",
         "DDA cubic fixed (no poly) 0.1", 0.1),
        ("Iso: Cubic DDA - fixed step (no poly)",
         "DDA cubic fixed (no poly) 0.01", 0.01),
        ("Iso: Cubic DDA - fixed step (loop)", "DDA cubic fixed (loop) 0.1",
         0.1),
        ("Iso: Cubic DDA - fixed step (loop)", "DDA cubic fixed (loop) 0.01",
         0.01),
        ("Iso: Cubic DDA - fixed step (explicit)",
         "DDA cubic fixed (explicit) 0.1", 0.1),
        ("Iso: Cubic DDA - fixed step (explicit)",
         "DDA cubic fixed (explicit) 0.01", 0.01),
        ("Iso: Cubic DDA - Sphere Simple (loop)",
         "DDA cubic Sphere Simple (loop)", 1),
        ("Iso: Cubic DDA - Sphere Bernstein (loop)",
         "DDA cubic Sphere Bernstein (loop)", 1),
        ("Iso: Cubic DDA - Sphere Simple (explicit)",
         "DDA cubic Sphere Simple (explicit)", 1),
        ("Iso: Cubic DDA - Sphere Bernstein (explicit)",
         "DDA cubic Sphere Bernstein (explicit)", 1),
    ]
    TIMING_STEPS = 50
    OUTPUT_STATS_ALL = "../results/statistics/iso-marschner-lobb/timings-all-%s.tsv"
    OUTPUT_STATS_AVG = "../results/statistics/iso-marschner-lobb/timings-avg-%s.tsv"
    OUTPUT_HISTO_ALL = "../results/statistics/iso-marschner-lobb/histograms-%s.tsv"
    OUTPUT_HISTO_CFG = "../results/statistics/iso-marschner-lobb/histogram-cfg-%s.tsv"
    OUTPUT_STATS_USE_DOUBLE = False
    OUTPUT_IMAGE_PATH = "../results/statistics/iso-marschner-lobb/images/"
    OUTPUT_INSTRUMENTATION = "../results/statistics/iso-marschner-lobb/instrumentation.tsv"

    HISTO_NUM_BINS = 100
    HISTO_BIN_MIN = np.log10(1e-6)
    HISTO_BIN_MAX = np.log10(1)
    HISTO_BIN_EDGES = [0.0] + list(10**np.linspace(
        HISTO_BIN_MIN, HISTO_BIN_MAX, HISTO_NUM_BINS))
    print("histogram bins:", HISTO_BIN_EDGES)

    os.makedirs(OUTPUT_IMAGE_PATH, exist_ok=True)

    # load settings file
    rendererArgs, camera, volumePath = pyrenderer.load_from_json(
        SETTINGS_FILE, ROOT_PATH)
    print("settings loaded")
    rendererArgs.width = RESOLUTION[0]
    rendererArgs.height = RESOLUTION[1]

    # create volume
    print("Create Marschner Lobb")
    volume = pyrenderer.Volume.create_marschner_lobb(VOLUME_RESOLUTION)
    print("Loaded volumed of resolution", volume.resolution, "and world size",
          volume.world_size)
    volume.copy_to_gpu()

    # allocate timing
    timer = pyrenderer.GpuTimer()
    times = [None] * len(KERNEL_NAMES)

    if mode == "visualize":
        # allocate output
        pyrenderer.reload_kernels(enableDebugging=False,
                                  enableInstrumentation=False)
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [None] * len(KERNEL_NAMES)

        # render
        camera.update_render_args(rendererArgs)
        for j, (kernel_name, _, stepsize) in enumerate(KERNEL_NAMES):
            print("Render", kernel_name, stepsize)
            rendererArgs.stepsize = stepsize
            timer.start()
            pyrenderer.render(kernel_name, volume, rendererArgs, output)
            timer.stop()
            outputs[j] = np.array(output.copy_to_cpu())
            times[j] = timer.elapsed_ms()

        def slugify(value):
            """
            Normalizes string, converts to lowercase, removes non-alpha characters,
            and converts spaces to hyphens.
            """
            import unicodedata
            import re
            value = str(unicodedata.normalize(
                'NFKD', value))  #.encode('ascii', 'ignore'))
            value = re.sub('[^\w\s-]', '', value).strip().lower()
            value = re.sub('[-\s]+', '-', value)
            return value

        # visualize
        print("Visualize")
        fig, axes = plt.subplots(ncols=len(KERNEL_NAMES), nrows=1)
        for j, (kernel_name, human_kernel_name, _) in enumerate(KERNEL_NAMES):
            img = screenShading(outputs[j])
            filename = os.path.join(OUTPUT_IMAGE_PATH,
                                    slugify(human_kernel_name) + ".png")
            imageio.imwrite(filename, img)

            axes[j].imshow(img)
            axes[j].set_xlabel(human_kernel_name)

        # save to numpy
        npz_output = {}
        npz_output['kernels'] = KERNEL_NAMES
        for j in range(len(KERNEL_NAMES)):
            npz_output['img_%d' % j] = outputs[j]
        np.savez(os.path.join(OUTPUT_IMAGE_PATH, "raw.npz"), **npz_output)

        plt.subplots_adjust(left=0.03,
                            bottom=0.05,
                            right=0.99,
                            top=0.97,
                            wspace=0.20,
                            hspace=0.23)
        plt.show()

    elif mode == "measure":
        summed_times = [0] * len(KERNEL_NAMES_MEASURE)

        pyrenderer.reload_kernels(enableDebugging=False,
                                  enableInstrumentation=False,
                                  otherPreprocessorArguments=[
                                      "-DKERNEL_USE_DOUBLE=%s" %
                                      ("1" if OUTPUT_STATS_USE_DOUBLE else "0")
                                  ])
        # allocate output for baseline
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        gt_outputs = [None] * TIMING_STEPS

        # render and write output
        with open(
                OUTPUT_STATS_ALL %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write(
                "Kernel Name\tFrame\tTime (ms)\tNum False Positives\tNum False Negatives\tMean Abs Depth Error\tVar Abs Depth Error\n"
            )
            for j, (kernel_name, human_kernel_name,
                    stepsize) in enumerate(KERNEL_NAMES_MEASURE):
                print("Render", kernel_name, stepsize)
                rendererArgs.stepsize = stepsize
                for k in range(TIMING_STEPS):
                    camera.yaw = k * 360 / TIMING_STEPS
                    camera.update_render_args(rendererArgs)
                    timer.start()
                    pyrenderer.render(kernel_name, volume, rendererArgs,
                                      output)
                    timer.stop()
                    out_img = np.array(output.copy_to_cpu())
                    if j == 0:  # baseline
                        gt_outputs[k] = out_img
                        falsePositives = 0
                        falseNegatives = 0
                        meanDepthError = 0
                        varDepthError = 0
                    else:
                        # compute false positives and negatives
                        falsePositives = np.sum(
                            (gt_outputs[k][:, :, 0] < out_img[:, :, 0]) * 1.0)
                        falseNegatives = np.sum(
                            (gt_outputs[k][:, :, 0] > out_img[:, :, 0]) * 1.0)
                        # compute mean depth error
                        mask = (out_img[:, :, 0] > 0) & (gt_outputs[k][:, :, 0]
                                                         > 0)
                        depthDiff = np.ma.masked_array(
                            np.abs(gt_outputs[k][:, :, 4] - out_img[:, :, 4]),
                            mask=mask)
                        meanDepthError = depthDiff.mean()
                        varDepthError = depthDiff.var()
                    t = timer.elapsed_ms()
                    summed_times[j] += t
                    f.write("%s\t%d\t%.4f\t%d\t%d\t%.4f\t%.4f\n" %
                            (human_kernel_name.replace(
                                "\n", " "), k, t, falsePositives,
                             falseNegatives, meanDepthError, varDepthError))

        # write average stats
        with open(
                OUTPUT_STATS_AVG %
            ("double" if OUTPUT_STATS_USE_DOUBLE else "float"), "w") as f:
            f.write("Kernel Name\tAvg-Time (ms)\n")
            for j, (_, human_kernel_name,
                    _) in enumerate(KERNEL_NAMES_MEASURE):
                f.write("%s\t%.4f\n" % (human_kernel_name.replace(
                    "\n", " "), summed_times[j] / TIMING_STEPS))

    elif mode == "instrumentation":
        # recompile with instrumentation
        pyrenderer.reload_kernels(enableInstrumentation=True)

        # allocate output
        output = pyrenderer.allocate_output(rendererArgs.width,
                                            rendererArgs.height,
                                            rendererArgs.render_mode)
        outputs = [None] * len(KERNEL_NAMES)

        fields = [
            "densityFetches-avg", "densityFetches-std", "ddaSteps-avg",
            "ddaSteps-std", "intervalEval-avg", "intervalEval-std",
            "intervalStep-avg", "intervalStep-std", "intervalMaxStep",
            "timeDensityFetch-avg", "timeDensityFetch-std",
            "timePolynomialCreation-avg", "timePolynomialCreation-std",
            "timePolynomialSolution-avg", "timePolynomialSolution-std",
            "timeTotal-avg", "timeTotal-std"
        ]
        # render
        with open(OUTPUT_INSTRUMENTATION, "w") as f:
            f.write("Kernel Name\t%s\n" % "\t".join(fields))
            camera.update_render_args(rendererArgs)
            for j, (kernel_name, human_kernel_name,
                    stepsize) in enumerate(KERNEL_NAMES):
                print("Render", kernel_name, stepsize)
                rendererArgs.stepsize = stepsize
                instrumentations = \
                    pyrenderer.render_with_instrumentation(
                        kernel_name, volume, rendererArgs, output)
                densityFetches = avg_and_std(
                    instrumentations["densityFetches"])
                ddaSteps = avg_and_std(instrumentations["ddaSteps"])
                intervalEval = avg_and_std(instrumentations["intervalEval"])
                intervalStep = weighted_avg_and_std(
                    instrumentations["intervalStep"],
                    instrumentations["intervalEval"])
                intervalMaxStep = np.max(instrumentations["intervalMaxStep"])
                timeDensityFetch = weighted_avg_and_std(
                    instrumentations["timeDensityFetch"],
                    instrumentations["timeDensityFetch_NumSamples"])
                timePolynomialCreation = weighted_avg_and_std(
                    instrumentations["timePolynomialCreation"],
                    instrumentations["timePolynomialCreation_NumSamples"])
                timePolynomialSolution = weighted_avg_and_std(
                    instrumentations["timePolynomialSolution"],
                    instrumentations["timePolynomialSolution_NumSamples"])
                timeTotal = avg_and_std(instrumentations["timeTotal"])
                f.write(
                    "%s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%d\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n"
                    % (human_kernel_name.replace("\n", " "), densityFetches[0],
                       densityFetches[1], ddaSteps[0], ddaSteps[1],
                       intervalEval[0], intervalEval[1], intervalStep[0],
                       intervalStep[1], intervalMaxStep, timeDensityFetch[0],
                       timeDensityFetch[1], timePolynomialCreation[0],
                       timePolynomialCreation[1], timePolynomialSolution[0],
                       timePolynomialSolution[1], timeTotal[0], timeTotal[1]))
                f.flush()