def combine2(im1, im2, width, height, dist): init_mask1 = hl.Func("mask1_layer_0") init_mask2 = hl.Func("mask2_layer_0") accumulator = hl.Func("combine_accumulator") output = hl.Func("combine_output") x, y = hl.Var("x"), hl.Var("y") im1_mirror = hl.BoundaryConditions.repeat_edge(im1, [(0, width), (0, height)]) im2_mirror = hl.BoundaryConditions.repeat_edge(im2, [(0, width), (0, height)]) weight1 = hl.f32(dist[im1_mirror[x, y]]) weight2 = hl.f32(dist[im2_mirror[x, y]]) init_mask1[x, y] = weight1 / (weight1 + weight2) init_mask2[x, y] = 1 - init_mask1[x, y] mask1 = init_mask1 mask2 = init_mask2 accumulator[x, y] = hl.i32(0) accumulator[x, y] += hl.i32(im1_mirror[x, y] * mask1[x, y]) + hl.i32(im2_mirror[x, y] * mask2[x, y]) output[x, y] = hl.u16_sat(accumulator[x, y]) init_mask1.compute_root().parallel(y).vectorize(x, 16) accumulator.compute_root().parallel(y).vectorize(x, 16) accumulator.update(0).parallel(y).vectorize(x, 16) return output
def brighten(input, gain): output = hl.Func("brighten_output") x, y = hl.Var("x"), hl.Var("y") output[x, y] = hl.u16_sat(gain * hl.u32(input[x, y])) return output
def black_white_level(input, black_point, white_point): output = hl.Func("black_white_level_output") x, y = hl.Var("x"), hl.Var("y") white_factor = 65535 / (white_point - black_point) output[x, y] = hl.u16_sat((hl.i32(input[x, y]) - black_point) * white_factor) return output
def contrast(input, strength, black_point): output = hl.Func("contrast_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") scale = strength inner_constant = math.pi / (2 * scale) sin_constant = hl.sin(inner_constant) slope = 65535 / (2 * sin_constant) constant = slope * sin_constant factor = math.pi / (scale * 65535) val = factor * hl.cast(hl.Float(32), input[x, y, c]) output[x, y, c] = hl.u16_sat(slope * hl.sin(val - inner_constant) + constant) white_scale = 65535 / (65535 - black_point) output[x, y, c] = hl.u16_sat((hl.cast(hl.Int(32), output[x, y, c]) - black_point) * white_scale) output.compute_root().parallel(y).vectorize(x, 16) return output
def yuv_to_rgb(input): print(' yuv_to_rgb') output = hl.Func("yuv_to_rgb_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") Y = input[x, y, 0] U = input[x, y, 1] V = input[x, y, 2] output[x, y, c] = hl.cast(hl.UInt(16), 0) output[x, y, 0] = hl.u16_sat(Y + 1.403 * V) output[x, y, 1] = hl.u16_sat(Y - 0.344 * U - 0.714 * V) output[x, y, 2] = hl.u16_sat(Y + 1.77 * U) output.compute_root().parallel(y).vectorize(x, 16) output.update(0).parallel(y).vectorize(x, 16) output.update(1).parallel(y).vectorize(x, 16) output.update(2).parallel(y).vectorize(x, 16) return output
def white_balance(input, width, height, white_balance_r, white_balance_g0, white_balance_g1, white_balance_b): output = hl.Func("white_balance_output") print(width, height, white_balance_r, white_balance_g0, white_balance_g1, white_balance_b) x, y = hl.Var("x"), hl.Var("y") rdom = hl.RDom([(0, width / 2), (0, height / 2)]) output[x, y] = hl.u16(0) output[rdom.x * 2, rdom.y * 2] = hl.u16_sat(white_balance_r * hl.f32(input[rdom.x * 2, rdom.y * 2])) output[rdom.x * 2 + 1, rdom.y * 2] = hl.u16_sat(white_balance_g0 * hl.f32(input[rdom.x * 2 + 1, rdom.y * 2])) output[rdom.x * 2, rdom.y * 2 + 1] = hl.u16_sat(white_balance_g1 * hl.f32(input[rdom.x * 2, rdom.y * 2 + 1])) output[rdom.x * 2 + 1, rdom.y * 2 + 1] = hl.u16_sat(white_balance_b * hl.f32(input[rdom.x * 2 + 1, rdom.y * 2 + 1])) output.compute_root().parallel(y).vectorize(x, 16) output.update(0).parallel(rdom.y) output.update(1).parallel(rdom.y) output.update(2).parallel(rdom.y) output.update(3).parallel(rdom.y) return output
def tone_map(input, width, height, compression, gain): print(f'Compression: {compression}, gain: {gain}') normal_dist = hl.Func("luma_weight_distribution") grayscale = hl.Func("grayscale") output = hl.Func("tone_map_output") x, y, c, v = hl.Var("x"), hl.Var("y"), hl.Var("c"), hl.Var("v") rdom = hl.RDom([(0, 3)]) normal_dist[v] = hl.f32(hl.exp(-12.5 * hl.pow(hl.f32(v) / 65535 - 0.5, 2))) grayscale[x, y] = hl.u16(hl.sum(hl.u32(input[x, y, rdom])) / 3) dark = grayscale comp_const = 1 gain_const = 1 comp_slope = (compression - comp_const) / (TONE_MAP_PASSES) gain_slope = (gain - gain_const) / (TONE_MAP_PASSES) for i in range(TONE_MAP_PASSES): print(' pass', i) norm_comp = i * comp_slope + comp_const norm_gain = i * gain_slope + gain_const bright = brighten(dark, norm_comp) dark_gamma = gamma_correct(dark) bright_gamma = gamma_correct(bright) dark_gamma = combine2(dark_gamma, bright_gamma, width, height, normal_dist) dark = brighten(gamma_inverse(dark_gamma), norm_gain) output[x, y, c] = hl.u16_sat(hl.u32(input[x, y, c]) * hl.u32(dark[x, y]) / hl.u32(hl.max(1, grayscale[x, y]))) grayscale.compute_root().parallel(y).vectorize(x, 16) normal_dist.compute_root().vectorize(v, 16) return output
def srgb(input, ccm): srgb_matrix = hl.Func("srgb_matrix") output = hl.Func("srgb_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") rdom = hl.RDom([(0, 3)]) srgb_matrix[x, y] = hl.f32(0) srgb_matrix[0, 0] = hl.f32(ccm[0][0]) srgb_matrix[1, 0] = hl.f32(ccm[0][1]) srgb_matrix[2, 0] = hl.f32(ccm[0][2]) srgb_matrix[0, 1] = hl.f32(ccm[1][0]) srgb_matrix[1, 1] = hl.f32(ccm[1][1]) srgb_matrix[2, 1] = hl.f32(ccm[1][2]) srgb_matrix[0, 2] = hl.f32(ccm[2][0]) srgb_matrix[1, 2] = hl.f32(ccm[2][1]) srgb_matrix[2, 2] = hl.f32(ccm[2][2]) output[x, y, c] = hl.u16_sat(hl.sum(srgb_matrix[rdom, c] * input[x, y, rdom])) return output
def combine(im1, im2, width, height, dist): init_mask1 = hl.Func("mask1_layer_0") init_mask2 = hl.Func("mask2_layer_0") accumulator = hl.Func("combine_accumulator") output = hl.Func("combine_output") x, y = hl.Var("x"), hl.Var("y") im1_mirror = hl.BoundaryConditions.repeat_edge(im1, [(0, width), (0, height)]) im2_mirror = hl.BoundaryConditions.repeat_edge(im2, [(0, width), (0, height)]) unblurred1 = im1_mirror unblurred2 = im2_mirror blurred1 = gauss_7x7(im1_mirror, "img1_layer_0") blurred2 = gauss_7x7(im2_mirror, "img2_layer_0") weight1 = hl.f32(dist[im1_mirror[x, y]]) weight2 = hl.f32(dist[im2_mirror[x, y]]) init_mask1[x, y] = weight1 / (weight1 + weight2) init_mask2[x, y] = 1 - init_mask1[x, y] mask1 = init_mask1 mask2 = init_mask2 num_layers = 2 accumulator[x, y] = hl.i32(0) for i in range(1, num_layers): print(' layer', i) prev_layer_str = str(i - 1) layer_str = str(i) laplace1 = diff(unblurred1, blurred1, "laplace1_layer_" + prev_layer_str) laplace2 = diff(unblurred2, blurred2, "laplace2_layer_" + layer_str) accumulator[x, y] += hl.i32(laplace1[x, y] * mask1[x, y]) + hl.i32(laplace2[x, y] * mask2[x, y]) unblurred1 = blurred1 unblurred2 = blurred2 blurred1 = gauss_7x7(blurred1, "img1_layer_" + layer_str) blurred2 = gauss_7x7(blurred2, "img2_layer_" + layer_str) mask1 = gauss_7x7(mask1, "mask1_layer_" + layer_str) mask2 = gauss_7x7(mask2, "mask2_layer_" + layer_str) accumulator[x, y] += hl.i32(blurred1[x, y] * mask1[x, y]) + hl.i32(blurred2[x, y] * mask2[x, y]) output[x, y] = hl.u16_sat(accumulator[x, y]) init_mask1.compute_root().parallel(y).vectorize(x, 16) accumulator.compute_root().parallel(y).vectorize(x, 16) for i in range(num_layers): accumulator.update(i).parallel(y).vectorize(x, 16) return output
def demosaic(input, width, height): print(f'width: {width}, height: {height}') f0 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f0") f1 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f1") f2 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f2") f3 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f3") f0.translate([-2, -2]) f1.translate([-2, -2]) f2.translate([-2, -2]) f3.translate([-2, -2]) d0 = hl.Func("demosaic_0") d1 = hl.Func("demosaic_1") d2 = hl.Func("demosaic_2") d3 = hl.Func("demosaic_3") output = hl.Func("demosaic_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") rdom0 = hl.RDom([(-2, 5), (-2, 5)]) # rdom1 = hl.RDom([(0, width / 2), (0, height / 2)]) input_mirror = hl.BoundaryConditions.mirror_interior(input, [(0, width), (0, height)]) f0.fill(0) f1.fill(0) f2.fill(0) f3.fill(0) f0_sum = 8 f1_sum = 16 f2_sum = 16 f3_sum = 16 f0[0, -2] = -1 f0[0, -1] = 2 f0[-2, 0] = -1 f0[-1, 0] = 2 f0[0, 0] = 4 f0[1, 0] = 2 f0[2, 0] = -1 f0[0, 1] = 2 f0[0, 2] = -1 f1[0, -2] = 1 f1[-1, -1] = -2 f1[1, -1] = -2 f1[-2, 0] = -2 f1[-1, 0] = 8 f1[0, 0] = 10 f1[1, 0] = 8 f1[2, 0] = -2 f1[-1, 1] = -2 f1[1, 1] = -2 f1[0, 2] = 1 f2[0, -2] = -2 f2[-1, -1] = -2 f2[0, -1] = 8 f2[1, -1] = -2 f2[-2, 0] = 1 f2[0, 0] = 10 f2[2, 0] = 1 f2[-1, 1] = -2 f2[0, 1] = 8 f2[1, 1] = -2 f2[0, 2] = -2 f3[0, -2] = -3 f3[-1, -1] = 4 f3[1, -1] = 4 f3[-2, 0] = -3 f3[0, 0] = 12 f3[2, 0] = -3 f3[-1, 1] = 4 f3[1, 1] = 4 f3[0, 2] = -3 d0[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f0[rdom0.x, rdom0.y]) / f0_sum) d1[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f1[rdom0.x, rdom0.y]) / f1_sum) d2[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f2[rdom0.x, rdom0.y]) / f2_sum) d3[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f3[rdom0.x, rdom0.y]) / f3_sum) R_row = y % 2 == 0 B_row = y % 2 != 0 R_col = x % 2 == 0 B_col = x % 2 != 0 at_R = c == 0 at_G = c == 1 at_B = c == 2 output[x, y, c] = hl.select(at_R & R_row & B_col, d1[x, y], at_R & B_row & R_col, d2[x, y], at_R & B_row & B_col, d3[x, y], at_G & R_row & R_col, d0[x, y], at_G & B_row & B_col, d0[x, y], at_B & B_row & R_col, d1[x, y], at_B & R_row & B_col, d2[x, y], at_B & R_row & R_col, d3[x, y], input[x, y]) d0.compute_root().parallel(y).vectorize(x, 16) d1.compute_root().parallel(y).vectorize(x, 16) d2.compute_root().parallel(y).vectorize(x, 16) d3.compute_root().parallel(y).vectorize(x, 16) output.compute_root().parallel(y).align_bounds(x, 2).unroll(x, 2).align_bounds(y, 2).unroll(y, 2).vectorize(x, 16) return output