def load_images(burst_path): print(f'\n{"=" * 30}\nLoading images...\n{"=" * 30}') start = datetime.utcnow() images = [] white_balance_r = 0 white_balance_g0 = 0 white_balance_g1 = 0 white_balance_b = 0 black_point = 0 white_point = 0 cfa_pattern = 0 # Create list of paths to the images paths = [] for i in range(100): if i < 10: filename = f'payload_N00{i}.dng' else: filename = f'payload_N0{i}.dng' file_path = f'{burst_path}/{filename}' if os.path.isfile(file_path): paths.append(file_path) else: if i == 0: raise ValueError("Burst format not recognized.") break # Load raw images print('Loading raw images...') p = multiprocessing.Pool(min(multiprocessing.cpu_count() - 1, len(paths))) for image in p.imap(load_image, paths): images.append(hl.Buffer(image)) assert len(images) >= 2, "Burst must consist of at least 2 images" # Get a reference image to compare results print('Getting reference image...') with rawpy.imread(paths[0]) as raw: white_balance = raw.camera_whitebalance print('white balance', white_balance) white_balance_r = white_balance[0] / white_balance[1] white_balance_g0 = 1 white_balance_g1 = 1 white_balance_b = white_balance[2] / white_balance[1] cfa_pattern = raw.raw_pattern cfa_pattern = decode_pattern(cfa_pattern) ccm = raw.color_matrix black_point = int(raw.black_level_per_channel[0]) white_point = int(raw.white_level) ref_img = raw.postprocess(output_bps=16) print('Building image buffer...') result = hl.Buffer(hl.UInt(16), [images[0].width(), images[0].height(), len(images)]) for index, image in enumerate(images): resultSlice = result.sliced(2, index) resultSlice.copy_from(image) print(f'Loading finished in {time_diff(start)} ms.\n') return result, ref_img, white_balance_r, white_balance_g0, white_balance_g1, white_balance_b, black_point, white_point, cfa_pattern, ccm
def main(): # Load an input image. image_path = os.path.join(os.path.dirname(__file__), "../../tutorial/images/rgb.png") input = hl.Buffer(imread(image_path)) # Allocated an image that will store the correct output reference_output = hl.Buffer( hl.UInt(8), [input.width(), input.height(), input.channels()]) print("Testing performance on CPU:") p1 = MyPipeline(input) p1.schedule_for_cpu() p1.test_performance() p1.curved.realize(reference_output) if have_opencl(): print("Testing performance on GPU:") p2 = MyPipeline(input) p2.schedule_for_gpu() p2.test_performance() p2.test_correctness(reference_output) else: print("Not testing performance on GPU, " "because I can't find the opencl library") return 0
def filter_test_image(bilateral_grid, input): bilateral_grid.compile_jit() # preparing input and output memory buffers (numpy ndarrays) input_data = get_input_data() input_image = hl.Buffer(input_data) input.set(input_image) output_data = np.empty(input_data.shape, dtype=input_data.dtype, order="F") output_image = hl.Buffer(output_data) if False: print("input_image", input_image) print("output_image", output_image) # do the actual computation bilateral_grid.realize(output_image) # save results input_path = "bilateral_grid_input.png" output_path = "bilateral_grid.png" imageio.imsave(input_path, input_data) imageio.imsave(output_path, output_data) print("\nbilateral_grid realized on output_image.") print("Result saved at '", output_path, "' ( input data copy at '", input_path, "' ).", sep="")
def main(): # Load an input image. image_path = os.path.join(os.path.dirname(__file__), "../../tutorial/images/rgb.png") input = hl.Buffer(imageio.imread(image_path)) # Allocated an image that will store the correct output reference_output = hl.Buffer( hl.UInt(8), [input.width(), input.height(), input.channels()]) print("Running pipeline on CPU:") p1 = MyPipeline(input) p1.schedule_for_cpu() p1.curved.realize(reference_output) print("Running pipeline on GPU:") p2 = MyPipeline(input) has_gpu_target = p2.schedule_for_gpu() if has_gpu_target: print("Testing GPU correctness:") p2.test_correctness(reference_output) else: print("No GPU target available on the host") print("Testing performance on CPU:") p1.test_performance() if has_gpu_target: print("Testing performance on GPU:") p2.test_performance() return 0
def main(): # define and compile the function input = hl.ImageParam(hl.UInt(8), 3, "input") erode = get_erode(input) erode.compile_jit() # preparing input and output memory buffers (numpy ndarrays) input_data = get_input_data() input_image = hl.Buffer(input_data) input.set(input_image) output_data = np.empty(input_data.shape, dtype=input_data.dtype, order="F") output_image = hl.Buffer(output_data) print("input_image", input_image) print("output_image", output_image) # do the actual computation erode.realize(output_image) # save results input_path = "erode_input.png" output_path = "erode_result.png" imageio.imsave(input_path, input_data) imageio.imsave(output_path, output_data) print("\nerode realized on output image.", "Result saved at", output_path, "( input data copy at", input_path, ")") print("\nEnd of game. Have a nice day!") return
def test_nobuildmethod(): x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() b_in = hl.Buffer(hl.Float(32), [2, 2]) b_in.fill(123) b_out = hl.Buffer(hl.Int(32), [2, 2]) f = nobuildmethod.generate(target, b_in, 1.0) f.realize(b_out) assert b_out.all_equal(123)
def test_bufferinfo_sharing(): # Torture-test to ensure that huge Python Buffer Protocol allocations are properly # shared (rather than copied), and also that the lifetime is held appropriately a0 = np.ones((20000, 30000), dtype=np.int32) b0 = hl.Buffer(a0) del a0 for i in range(200): b1 = hl.Buffer(b0) b0 = b1 b1 = None gc.collect() b0[56, 34] = 12 assert b0[56, 34] == 12
def test_partialbuildmethod(): x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() b_in = hl.Buffer(hl.Float(32), [2, 2]) b_in.fill(123) b_out = hl.Buffer(hl.Int(32), [2, 2]) try: f = partialbuildmethod.generate(target, b_in, 1) except RuntimeError as e: assert "Generators that use build() (instead of generate()+Output<>) are not supported in the Python bindings." in str(e) else: assert False, 'Did not see expected exception!'
def filter_test_image(local_laplacian, input): local_laplacian.compile_jit(hl.get_target_from_environment()) # preparing input and output memory buffers (numpy ndarrays) input_data = get_input_data() input_image = hl.Buffer(input_data) input.set(input_image) output_data = np.empty_like(input_data) # do the actual computation input_width, input_height = input_data.shape[:2] output_image = local_laplacian.realize(input_width, input_height, 3) output_data = np.asanyarray(output_image) # convert back to uint8 input_data = (input_data >> 8).astype(np.uint8) output_data = (output_data >> 8).astype(np.uint8) # save results input_path = "local_laplacian_input.png" output_path = "local_laplacian.png" imageio.imsave(input_path, input_data) imageio.imsave(output_path, output_data) print() print("local_laplacian realized on output_image.") print('Result saved at {} (input data copy at {}).'.format( output_path, input_path))
def test_for_each_element(): buf = hl.Buffer(hl.Float(32), [3, 4]) for x in range(3): for y in range(4): buf[x, y] = x + y # Can't use 'assert' in a lambda, but can call a fn that uses it. buf.for_each_element(lambda pos, buf=buf: _assert_fn(buf[pos[0], pos[1]] == pos[0] + pos[1]))
def _make_constant_image(): constant_image = hl.Buffer(hl.UInt(8), [32, 32, 3], 'constant_image') for x in range(32): for y in range(32): for c in range(3): constant_image[x, y, c] = x + y + c return constant_image
def main(): input = hl.ImageParam(float_t, 3, "input") levels = 10 interpolate = get_interpolate(input, levels) # preparing input and output memory buffers (numpy ndarrays) input_data = get_input_data() assert input_data.shape[2] == 4 input_image = hl.Buffer(input_data) input.set(input_image) input_width, input_height = input_data.shape[:2] t0 = datetime.now() output_image = interpolate.realize(input_width, input_height, 3) t1 = datetime.now() print('Interpolated in %.5f secs' % (t1 - t0).total_seconds()) output_data = hl.buffer_to_ndarray(output_image) # save results input_path = "interpolate_input.png" output_path = "interpolate_result.png" imsave(input_path, input_data) imsave(output_path, output_data) print("\nblur realized on output image.", "Result saved at", output_path, "( input data copy at", input_path, ")") print("\nEnd of game. Have a nice day!")
def gauss_15x15(input, name): print(' gauss_15x15') k = hl.Buffer(hl.Float(32), [15], "gauss_15x15") k.translate([-7]) rdom = hl.RDom([(-7, 15)]) k.fill(0) k[-7] = 0.004961 k[-6] = 0.012246 k[-5] = 0.026304 k[-4] = 0.049165 k[-3] = 0.079968 k[-2] = 0.113193 k[-1] = 0.139431 k[0] = 0.149464 k[7] = 0.004961 k[6] = 0.012246 k[5] = 0.026304 k[4] = 0.049165 k[3] = 0.079968 k[2] = 0.113193 k[1] = 0.139431 return gauss(input, k, rdom, name)
def test_ndarray_to_buffer(): a0 = np.ones((200, 300), dtype=np.int32) # Buffer always shares data (when possible) by default, # and maintains the shape of the data source. (note that # the ndarray is col-major by default!) b0 = hl.Buffer(a0, "float32_test_buffer") assert b0.type() == hl.Int(32) assert b0.name() == "float32_test_buffer" assert b0.all_equal(1) assert b0.dim(0).min() == 0 assert b0.dim(0).max() == 199 assert b0.dim(0).extent() == 200 assert b0.dim(0).stride() == 300 assert b0.dim(1).min() == 0 assert b0.dim(1).max() == 299 assert b0.dim(1).extent() == 300 assert b0.dim(1).stride() == 1 a0[12, 34] = 56 assert b0[12, 34] == 56 b0[56, 34] = 12 assert a0[56, 34] == 12
def test_interleaved_ndarray(): w = 7 h = 13 c = 3 a = np.ndarray(dtype=np.uint8, shape=(w, h, c), strides=(c, w * c, 1)) assert a.shape == (w, h, c) assert a.strides == (c, w * c, 1) assert a.dtype == np.uint8 b = hl.Buffer(a) assert b.type() == hl.UInt(8) assert b.dim(0).min() == 0 assert b.dim(0).extent() == w assert b.dim(0).stride() == c assert b.dim(1).min() == 0 assert b.dim(1).extent() == h assert b.dim(1).stride() == w * c assert b.dim(2).min() == 0 assert b.dim(2).extent() == c assert b.dim(2).stride() == 1
def test_performance(self): # Test the performance of the scheduled MyPipeline. output = hl.Buffer( hl.UInt(8), [self.input.width(), self.input.height(), self.input.channels()]) # Run the filter once to initialize any GPU runtime state. self.curved.realize(output) # Now take the best of 3 runs for timing. best_time = float("inf") for i in range(3): t1 = datetime.now() # Run the filter 100 times. for j in range(100): self.curved.realize(output) # Force any GPU code to finish by copying the buffer back to the # CPU. output.copy_to_host() t2 = datetime.now() elapsed = (t2 - t1).total_seconds() if elapsed < best_time: best_time = elapsed # end of "best of three times" print("%1.4f milliseconds" % (best_time * 1000))
def _evaluate(e): # TODO: support zero-dim Func, Buffers buf = hl.Buffer(e.type(), 1) f = hl.Func(); x = hl.Var() f[x] = e; f.realize(buf) return buf(0)
def _realize_and_check(f, offset=0): b = hl.Buffer(hl.Float(32), [2, 2]) f.realize(b) assert b[0, 0] == 3.5 + offset + 123 assert b[0, 1] == 4.5 + offset + 123 assert b[1, 0] == 4.5 + offset + 123 assert b[1, 1] == 5.5 + offset + 123
def test_overflow(): # size = INT_MAX w_intmax = 0x7FFFFFFF # When size == INT_MAX, we should not emit error size_intmax = np.ndarray(dtype=np.uint8, shape=(w_intmax)) hl.Buffer(size_intmax) # size = INT_MAX + 1 w_over_intmax = 0x7FFFFFFF + 1 # We should emit the error when the size > INT_MAX size_over_intmax = np.ndarray(dtype=np.uint8, shape=(w_over_intmax)) try: hl.Buffer(size_over_intmax) except ValueError as e: assert 'Out of range arguments to make_dim_vec.' in str(e)
def _evaluate(e): # TODO: support zero-dim Func, Buffers buf = hl.Buffer(type=e.type(), sizes=[1]) f = hl.Func() x = hl.Var() f[x] = e f.realize(buf) return buf[0]
def test_reorder(): W = 7 H = 5 C = 3 Z = 2 a = hl.Buffer(type=hl.UInt(8), sizes=[W, H, C], storage_order=[2, 0, 1]) assert a.dim(0).extent() == W assert a.dim(1).extent() == H assert a.dim(2).extent() == C assert a.dim(2).stride() == 1 assert a.dim(0).stride() == C assert a.dim(1).stride() == W * C b = hl.Buffer(hl.UInt(8), [W, H, C, Z], [2, 3, 0, 1]) assert b.dim(0).extent() == W assert b.dim(1).extent() == H assert b.dim(2).extent() == C assert b.dim(3).extent() == Z assert b.dim(2).stride() == 1 assert b.dim(3).stride() == C assert b.dim(0).stride() == C * Z assert b.dim(1).stride() == W * C * Z b2 = hl.Buffer(hl.UInt(8), [C, Z, W, H]) assert b.dim(0).extent() == b2.dim(2).extent() assert b.dim(1).extent() == b2.dim(3).extent() assert b.dim(2).extent() == b2.dim(0).extent() assert b.dim(3).extent() == b2.dim(1).extent() assert b.dim(0).stride() == b2.dim(2).stride() assert b.dim(1).stride() == b2.dim(3).stride() assert b.dim(2).stride() == b2.dim(0).stride() assert b.dim(3).stride() == b2.dim(1).stride() b2.transpose([2, 3, 0, 1]) assert b.dim(0).extent() == b2.dim(0).extent() assert b.dim(1).extent() == b2.dim(1).extent() assert b.dim(2).extent() == b2.dim(2).extent() assert b.dim(3).extent() == b2.dim(3).extent() assert b.dim(0).stride() == b2.dim(0).stride() assert b.dim(1).stride() == b2.dim(1).stride() assert b.dim(2).stride() == b2.dim(2).stride() assert b.dim(3).stride() == b2.dim(3).stride()
def test_bufferinfo_sharing(): # Don't bother testing this on 32-bit systems (our "huge" size is too large there) if not _is_64bits(): print("skipping test_bufferinfo_sharing()") return # Torture-test to ensure that huge Python Buffer Protocol allocations are properly # shared (rather than copied), and also that the lifetime is held appropriately. a0 = np.ones((20000, 30000), dtype=np.int32) b0 = hl.Buffer(a0) del a0 for i in range(200): b1 = hl.Buffer(b0) b0 = b1 b1 = None gc.collect() b0[56, 34] = 12 assert b0[56, 34] == 12
def test_all(vector_width, target): # print("target is %s " % str(target)) W = 32 H = 32 input = hl.Buffer(hl.UInt(8), [W, H]) for r in range(H): for c in range(W): input[c, r] = (c + r * W) & 0xff input_f = hl.Func() input_f[x, y] = input[x, y] tests = [ (hl.BoundaryConditions.constant_exterior, check_constant_exterior), (hl.BoundaryConditions.repeat_edge, check_repeat_edge), (hl.BoundaryConditions.repeat_image, check_repeat_image), (hl.BoundaryConditions.mirror_image, check_mirror_image), (hl.BoundaryConditions.mirror_interior, check_mirror_interior), ] for bc, checker in tests: # print(' Testing %s:%d...' % (bc.__name__, vector_width)) func_input_args = {'f': input_f, 'bounds': [(0, W), (0, H)]} image_input_args = {'f': input, 'bounds': [(0, W), (0, H)]} undef_min_args = { 'f': input, 'bounds': [(hl.Expr(), hl.Expr()), (0, H)] } undef_max_args = { 'f': input, 'bounds': [(0, W), (hl.Expr(), hl.Expr())] } implicit_bounds_args = {'f': input} if bc == hl.BoundaryConditions.constant_exterior: func_input_args['exterior'] = test_exterior image_input_args['exterior'] = test_exterior undef_min_args['exterior'] = test_exterior undef_max_args['exterior'] = test_exterior implicit_bounds_args['exterior'] = test_exterior realize_and_check(bc(**func_input_args), checker, input, test_min, test_extent, test_min, test_extent, vector_width, target) realize_and_check(bc(**image_input_args), checker, input, test_min, test_extent, test_min, test_extent, vector_width, target) realize_and_check(bc(**undef_min_args), checker, input, 0, W, test_min, test_extent, vector_width, target) realize_and_check(bc(**undef_max_args), checker, input, test_min, test_extent, 0, H, vector_width, target) realize_and_check(bc(**implicit_bounds_args), checker, input, test_min, test_extent, test_min, test_extent, vector_width, target)
def realize_and_check(f, checker, input, test_min_x, test_extent_x, test_min_y, test_extent_y, vector_width, target): result = hl.Buffer(hl.UInt(8), [test_extent_x, test_extent_y]) result.set_min([test_min_x, test_min_y]) f2 = hl.lambda_func(x, y, f[x, y]) schedule_test(f2, vector_width, target) f2.realize(result, target) result.copy_to_host() for r in range(test_min_y, test_min_y + test_extent_y): for c in range(test_min_x, test_min_x + test_extent_x): checker(input, result, c, r)
def test_print_when(): x = hl.Var('x') f = hl.Func('f') f[x] = hl.print_when(x == 3, hl.cast(hl.UInt(8), x * x), 'is result at', x) buf = hl.Buffer(hl.UInt(8), [10]) output = StringIO() with _redirect_stdout(output): f.realize(buf) expected = '9 is result at 3\n' actual = output.getvalue() assert expected == actual, "Expected: %s, Actual: %s" % (expected, actual)
def test_compiletime_error(): x = hl.Var('x') y = hl.Var('y') f = hl.Func('f') f[x, y] = hl.u16(x + y) # Deliberate type-mismatch error buf = hl.Buffer(hl.UInt(8), [2, 2]) try: f.realize(buf) except RuntimeError as e: assert 'Output buffer f has type uint16 but type of the buffer passed in is uint8' in str(e) else: assert False, 'Did not see expected exception!'
def test_print_expr(): x = hl.Var('x') f = hl.Func('f') f[x] = hl.print(hl.cast(hl.UInt(8), x), 'is what', 'the', 1, 'and', 3.1415, 'saw') buf = hl.Buffer(hl.UInt(8), 1) output = StringIO() with _redirect_stdout(output): f.realize(buf) expected = '0 is what the 1 and 3.141500 saw\n' actual = output.getvalue() assert expected == actual, "Expected: %s, Actual: %s" % (expected, actual) return
def test_compiletime_error(): x = hl.Var('x') y = hl.Var('y') f = hl.Func('f') f[x, y] = hl.cast(hl.UInt(16), x + y) # Deliberate type-mismatch error buf = hl.Buffer(hl.UInt(8), [2, 2]) try: f.realize(buf) except RuntimeError as e: assert 'Buffer has type uint8, but Func "f" has type uint16.' in str(e) else: assert False, 'Did not see expected exception!'
def test_runtime_error(): x = hl.Var('x') f = hl.Func('f') f[x] = hl.u8(x) f.bound(x, 0, 1) # Deliberate runtime error buf = hl.Buffer(hl.UInt(8), [10]) try: f.realize(buf) except RuntimeError as e: assert 'do not cover required region' in str(e) else: assert False, 'Did not see expected exception!'
def test_overflow(): # Don't bother testing this on 32-bit systems (our "huge" size is too large there) if not _is_64bits(): print("skipping test_overflow()") return # size = INT_MAX w_intmax = 0x7FFFFFFF # When size == INT_MAX, we should not emit error size_intmax = np.ndarray(dtype=np.uint8, shape=(w_intmax)) hl.Buffer(size_intmax) # size = INT_MAX + 1 w_over_intmax = 0x7FFFFFFF + 1 # We should emit the error when the size > INT_MAX size_over_intmax = np.ndarray(dtype=np.uint8, shape=(w_over_intmax)) try: hl.Buffer(size_over_intmax) except ValueError as e: assert 'Out of range arguments to make_dim_vec.' in str(e)