def objective(args): args['ksize'] = int(args['ksize']) filter = Filter(average_disparity, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False) filter.params = dict(args) costs = [] for i in range(source.n_frames): frame = [ downsample(source.video[i][0], frame_down_factor), downsample(source.video[i][1], frame_down_factor) ] disp, fovea_corner = filter.process_frame(source.positions[i], frame) true_disp = downsample(source.ground_truth[i], frame_down_factor) costs.append(cost(disp[:, values:], true_disp, average_disparity)) mean_cost = np.mean(costs) print(mean_cost, args) return mean_cost
def objective(args): error = 0.0 for source, frame_ten, true_points in sources: full_shape = source.frame_ten[0].shape frame_shape = frame_ten[0].shape frame_ten = [downsample(source.frame_ten[0], frame_down_factor), downsample(source.frame_ten[1], frame_down_factor)] # --- BP disp = coarse_bp( frame_ten, values=values, down_factor=0, iters=iters, **args) disp *= 2**frame_down_factor # --- compute error cost_i = cost_on_points(disp[:,values:], true_points, full_shape=full_shape) error += cost_i if 0: print(cost_i, error) plt.figure() rows, cols = 2, 1 plt.subplot(rows, cols, 1) plt.imshow(frame_ten[0], cmap='gray') plt.subplot(rows, cols, 2) plt.imshow(disp, vmin=0, vmax=64) # plt.imshow(disp, vmin=0, vmax=full_values) plt.show() error /= len(sources) errors[arg_key(args)] = error args_s = "{%s}" % ', '.join("%r: %s" % (k, args[k]) for k in sorted(space)) print("%s: %s" % (error, args_s)) return error
def foveation_sequence(): frame_down_factor = 1 mem_down_factor = 2 # relative to the frame down factor coarse_down_factor = 2 # for the coarse comparison fs = 80 fovea_shape = (fs, fs) full_values = 128 values = full_values / 2**frame_down_factor index = 15 n_frames = 10 source = KittiMultiViewSource(index, test=False, n_frames=n_frames) full_shape = source.frame_ten[0].shape frame_ten = [downsample(source.frame_ten[0], frame_down_factor), downsample(source.frame_ten[1], frame_down_factor)] frame_shape = frame_ten[0].shape average_disp = source.get_average_disparity() average_disp = cv2.pyrUp(average_disp)[:frame_shape[0],:frame_shape[1]-values] filter = Filter(average_disp, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False, memory_length=0) plt.figure() import matplotlib.cm as cm for i in range(0, 10, 2): frame = [downsample(source.frame_sequence[i][0], frame_down_factor), downsample(source.frame_sequence[i][1], frame_down_factor)] filter_disp, fovea_corner = filter.process_frame(None, frame) edge = 5 plt.subplot(5,1,i/2+1) # plt.subplot(5,2,i+1) plt.imshow(trim(frame[0], values, edge), cmap = cm.Greys_r) # remove_axes() # plt.subplot(5,2,i+2) # plt.imshow(trim(filter_disp, values, edge), vmin=0, vmax=full_values) fovea_corner = fovea_corner[0] # plot_edges(fovea_ij, (fs, fs)) fi, fj = fovea_corner fm = fs fn = fs plt.plot([fj, fj+fn, fj+fn, fj, fj], [fi, fi, fi+fm, fi+fm, fi], 'white') # plt.scatter(fovea_corner[1]-values+fs/2, fovea_corner[0]-edge+fs/2, s=100, c='green', marker='+', linewidths=2) # plt.scatter(fovea_corner[1]-values, fovea_corner[0]-edge, s=9, c='green', marker='+', linewidths=3) # plt.scatter(fovea_corner[1]-values+fs, fovea_corner[0]-edge+fs, s=9, c='green', marker='+', linewidths=3) # plt.scatter(fovea_corner[1]-values, fovea_corner[0]-edge+fs, s=9, c='green', marker='+', linewidths=3) # plt.scatter(fovea_corner[1]-values+fs, fovea_corner[0]-edge, s=9, c='green', marker='+', linewidths=3) remove_axes() plt.tight_layout(-1) plt.show()
def get_test_case(source): frame_ten = [ downsample(source.frame_ten[0], frame_down_factor), downsample(source.frame_ten[1], frame_down_factor) ] frame_shape = frame_ten[0].shape average_disp = source.get_average_disparity() # may throw IOError true_points = source.get_ground_truth_points(occluded=False) return frame_ten, frame_shape, average_disp, true_points
def objective(args): # args['ksize'] = int(args['ksize']) costs = [] # for index in range(3): # for index in range(10): for index in range(194): if index in [31, 82, 114]: # missing frames continue source = KittiMultiViewSource(index, test=False, n_frames=0) full_shape = source.frame_ten[0].shape frame_ten = [downsample(source.frame_ten[0], frame_down_factor), downsample(source.frame_ten[1], frame_down_factor)] frame_shape = frame_ten[0].shape true_points = source.get_ground_truth_points(occluded=False) # determine fovea shape fovea_pixels = np.ceil(fovea_fraction * frame_shape[0] * (frame_shape[1] - values)) if fovea_pixels > 0: fovea_height = np.minimum(frame_shape[0], np.ceil(fovea_pixels ** .5)) fovea_width = np.minimum(frame_shape[1], np.ceil(fovea_pixels / fovea_height)) fovea_shape = fovea_height, fovea_width else: fovea_shape = (0, 0) # average disparity try: average_disp = source.get_average_disparity() except IOError: # likely does not have a full 20 frames print("Skipping index %d (lacks frames)" % index) continue average_disp = upsample_average_disp( average_disp, frame_down_factor, frame_shape, values) # run the filter foveal = Foveal(average_disp, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, iters=iters, fovea_levels=fovea_levels, **args) disp, _ = foveal.process_frame(frame_ten) # cost = cost_on_points(disp[:,values:], true_points, full_shape=full_shape) cost = cost_on_points(disp[:,values:], true_points, average_disp, full_shape=full_shape, clip=error_clip) costs.append(cost) error = np.mean(costs) errors[arg_key(args)] = error args_s = "{%s}" % ', '.join("%r: %s" % (k, args[k]) for k in sorted(space)) print("%s: %s" % (error, args_s)) return error
def process_frame(frame): params = { 'data_exp': 1.09821084614, 'data_max': 82.191597317, 'data_weight': 0.0139569211273, 'disc_max': 15.1301410452 } laplacian_ksize = 3 laplacian_scale = 0.5 iters = 5 left, right, top, bottom = values + 6, 6, 6, 6 frame = (pad(frame[0], left, right, top, bottom), pad(frame[1], left, right, top, bottom)) down_factor = 1 values_coarse = values / 2**down_factor img1d = downsample(frame[0], down_factor) img2d = downsample(frame[1], down_factor) if fast: fovea_levels = 2 min_level = 1 else: fovea_levels = 1 min_level = 0 fovea_corner = np.asarray((0, 0)) fovea_shape = np.asarray((0, 0)) disp = foveal_bp((img1d, img2d), fovea_corner, fovea_shape, seed=None, values=values_coarse, iters=iters, levels=5, fovea_levels=fovea_levels, min_level=min_level, laplacian_ksize=1, laplacian_scale=0.5, post_smooth=None, **params) disp *= 2**down_factor disp = upsample(disp, down_factor, frame[0].shape) disp = unpad(disp, left, right, top, bottom) # # this actually helps slightly but let's keep it simple and fast # post_smooth = 3 # disp = cv2.GaussianBlur(disp.astype(np.float32), (9, 9), post_smooth) # disp = np.round(disp).astype(np.uint8) return disp
def objective(args): error = 0.0 for source, frame_ten, true_points in sources: full_shape = source.frame_ten[0].shape frame_shape = frame_ten[0].shape frame_ten = [ downsample(source.frame_ten[0], frame_down_factor), downsample(source.frame_ten[1], frame_down_factor) ] # --- BP disp = coarse_bp(frame_ten, values=values, down_factor=0, iters=iters, **args) disp *= 2**frame_down_factor # --- compute error cost_i = cost_on_points(disp[:, values:], true_points, full_shape=full_shape) error += cost_i if 0: print(cost_i, error) plt.figure() rows, cols = 2, 1 plt.subplot(rows, cols, 1) plt.imshow(frame_ten[0], cmap='gray') plt.subplot(rows, cols, 2) plt.imshow(disp, vmin=0, vmax=64) # plt.imshow(disp, vmin=0, vmax=full_values) plt.show() error /= len(sources) errors[arg_key(args)] = error args_s = "{%s}" % ', '.join("%r: %s" % (k, args[k]) for k in sorted(space)) print("%s: %s" % (error, args_s)) return error
def process_frame(frame): params = { 'data_exp': 1.09821084614, 'data_max': 82.191597317, 'data_weight': 0.0139569211273, 'disc_max': 15.1301410452} laplacian_ksize = 3 laplacian_scale = 0.5 iters = 5 left, right, top, bottom = values+6, 6, 6, 6 frame = (pad(frame[0], left, right, top, bottom), pad(frame[1], left, right, top, bottom)) down_factor = 1 values_coarse = values / 2**down_factor img1d = downsample(frame[0], down_factor) img2d = downsample(frame[1], down_factor) if fast: fovea_levels = 2 min_level = 1 else: fovea_levels = 1 min_level = 0 fovea_corner = np.asarray((0,0)) fovea_shape = np.asarray((0,0)) disp = foveal_bp((img1d, img2d), fovea_corner, fovea_shape, seed=None, values=values_coarse, iters=iters, levels=5, fovea_levels=fovea_levels, min_level=min_level, laplacian_ksize=1, laplacian_scale=0.5, post_smooth=None, **params) disp *= 2**down_factor disp = upsample(disp, down_factor, frame[0].shape) disp = unpad(disp, left, right, top, bottom) # # this actually helps slightly but let's keep it simple and fast # post_smooth = 3 # disp = cv2.GaussianBlur(disp.astype(np.float32), (9, 9), post_smooth) # disp = np.round(disp).astype(np.uint8) return disp
def objective(args): args['ksize'] = int(args['ksize']) filter = Filter(average_disparity, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False) filter.params = dict(args) costs = [] for i in range(source.n_frames): frame = [downsample(source.video[i][0], frame_down_factor), downsample(source.video[i][1], frame_down_factor)] disp, fovea_corner = filter.process_frame(source.positions[i], frame) true_disp = downsample(source.ground_truth[i], frame_down_factor) costs.append(cost(disp[:,values:], true_disp, average_disparity)) mean_cost = np.mean(costs) print(mean_cost, args) return mean_cost
def __init__(self, average_disparity, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, max_n_foveas=1, **bp_args): # self.frame_down_factor = frame_down_factor self.mem_down_factor = mem_down_factor self.frame_step = 2**frame_down_factor self.mem_step = 2**mem_down_factor self.average_disparity = downsample(average_disparity, down_factor=mem_down_factor) self.frame_shape = frame_shape self.fovea_shape = fovea_shape self.values = values self.max_n_foveas = max_n_foveas self.post_smooth = None self.params = { 'data_exp': 1.09821084614, 'data_max': 112.191597317, 'data_weight': 0.0139569211273, 'disc_max': 12.1301410452, 'laplacian_ksize': 3, 'smooth': 1.84510833504e-07 } # self.params = { # 'data_exp': 14.2348581842, 'data_max': 79101007093.4, # 'data_weight': 0.000102496570364, 'disc_max': 4.93508276126, # 'laplacian_ksize': 5, 'laplacian_scale': 0.38937704644, # 'smooth': 0.00146126755993} # optimized for frame_down: 1, mem_down: 2, fovea_levels: 2 self.params.update(bp_args) self._uc = UnusuallyClose(self.average_disparity)
# --- setup iters = 3 full_values = 128 frame_down_factor = 1 assert full_values % 2**frame_down_factor == 0 values = full_values / 2**frame_down_factor sources = [] # for index in range(1): # for index in range(30): for index in range(194): source = KittiMultiViewSource(index, test=False) frame_ten = [downsample(source.frame_ten[0], frame_down_factor), downsample(source.frame_ten[1], frame_down_factor)] true_points = source.get_ground_truth_points(occluded=False) sources.append((source, frame_ten, true_points)) r = np.log(10) space = collections.OrderedDict([ ('laplacian_ksize', pyll.scope.int(1 + hp.quniform('laplacian_ksize', 0, 20, 2))), ('laplacian_scale', hp.lognormal('laplacian_scale', 0, r)), ('data_weight', hp.lognormal('data_weight', -2*r, r)), ('data_max', hp.lognormal('data_max', 2*r, r)), ('data_exp', hp.lognormal('data_exp', 0, r)), ('disc_max', hp.lognormal('disc_max', r, r)), ('smooth', hp.lognormal('smooth', 0, r)),
def rationale(): """ Figure that illustrates rationale for fovea approach, i.e. diminishing returns with increasing runtime via further iterations at a given downfactor, but possibly too-high cost of using lower downfactor. """ # n_frames = 194 n_frames = 50 frame_down_factor = 1 source = KittiMultiViewSource(0) full_shape = source.frame_ten[0].shape frame_shape = downsample(source.frame_ten[0], frame_down_factor).shape params = {'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, 'data_max': 32.024780646200725, 'laplacian_ksize': 3} # iterations = [1, 2, 3] iterations = [1, 2, 3, 4, 5, 7, 10, 15] mean_coarse_times = [] mean_coarse_costs = [] mean_fine_times = [] mean_fine_costs = [] for j in range(len(iterations)): print(str(iterations[j]) + ' iterations') coarse_times = [] coarse_costs = [] fine_times = [] fine_costs = [] for i in range(n_frames): print(' frame ' + str(i)) source = KittiMultiViewSource(i) true_points = source.get_ground_truth_points(occluded=False) frame_down_factor = 1 coarse_time, coarse_cost = _evaluate_frame(source.frame_ten, true_points, frame_shape, full_shape, frame_down_factor+1, frame_down_factor, iterations[j], params) fine_time, fine_cost = _evaluate_frame(source.frame_ten, true_points, frame_shape, full_shape, frame_down_factor+0, frame_down_factor, iterations[j], params) coarse_times.append(coarse_time) coarse_costs.append(coarse_cost) fine_times.append(fine_time) fine_costs.append(fine_cost) mean_coarse_times.append(np.mean(coarse_times)) mean_coarse_costs.append(np.mean(coarse_costs)) mean_fine_times.append(np.mean(fine_times)) mean_fine_costs.append(np.mean(fine_costs)) print(mean_coarse_times) print(mean_coarse_costs) print(mean_fine_times) print(mean_fine_costs) plt.plot(mean_coarse_times, mean_coarse_costs, color='k', marker='s', markersize=12) plt.plot(mean_fine_times, mean_fine_costs, color='k', marker='o', markersize=12) plt.xlabel('Runtime (s)', fontsize=18) plt.ylabel('Mean absolute disparity error (pixels)', fontsize=18) plt.gca().tick_params(labelsize='18') plt.show()
# --- setup iters = 3 full_values = 128 frame_down_factor = 1 assert full_values % 2**frame_down_factor == 0 values = full_values / 2**frame_down_factor sources = [] # for index in range(1): # for index in range(30): for index in range(194): source = KittiMultiViewSource(index, test=False) frame_ten = [ downsample(source.frame_ten[0], frame_down_factor), downsample(source.frame_ten[1], frame_down_factor) ] true_points = source.get_ground_truth_points(occluded=False) sources.append((source, frame_ten, true_points)) r = np.log(10) space = collections.OrderedDict([ ('laplacian_ksize', pyll.scope.int(1 + hp.quniform('laplacian_ksize', 0, 20, 2))), ('laplacian_scale', hp.lognormal('laplacian_scale', 0, r)), ('data_weight', hp.lognormal('data_weight', -2 * r, r)), ('data_max', hp.lognormal('data_max', 2 * r, r)), ('data_exp', hp.lognormal('data_exp', 0, r)), ('disc_max', hp.lognormal('disc_max', r, r)),
def foveation_sequence(): frame_down_factor = 1 mem_down_factor = 2 # relative to the frame down factor coarse_down_factor = 2 # for the coarse comparison fs = 80 fovea_shape = (fs, fs) full_values = 128 values = full_values / 2**frame_down_factor index = 15 n_frames = 10 source = KittiMultiViewSource(index, test=False, n_frames=n_frames) full_shape = source.frame_ten[0].shape frame_ten = [ downsample(source.frame_ten[0], frame_down_factor), downsample(source.frame_ten[1], frame_down_factor) ] frame_shape = frame_ten[0].shape average_disp = source.get_average_disparity() average_disp = cv2.pyrUp(average_disp)[:frame_shape[0], :frame_shape[1] - values] filter = Filter(average_disp, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False, memory_length=0) plt.figure() import matplotlib.cm as cm for i in range(0, 10, 2): frame = [ downsample(source.frame_sequence[i][0], frame_down_factor), downsample(source.frame_sequence[i][1], frame_down_factor) ] filter_disp, fovea_corner = filter.process_frame(None, frame) edge = 5 plt.subplot(5, 1, i / 2 + 1) # plt.subplot(5,2,i+1) plt.imshow(trim(frame[0], values, edge), cmap=cm.Greys_r) # remove_axes() # plt.subplot(5,2,i+2) # plt.imshow(trim(filter_disp, values, edge), vmin=0, vmax=full_values) fovea_corner = fovea_corner[0] # plot_edges(fovea_ij, (fs, fs)) fi, fj = fovea_corner fm = fs fn = fs plt.plot([fj, fj + fn, fj + fn, fj, fj], [fi, fi, fi + fm, fi + fm, fi], 'white') # plt.scatter(fovea_corner[1]-values+fs/2, fovea_corner[0]-edge+fs/2, s=100, c='green', marker='+', linewidths=2) # plt.scatter(fovea_corner[1]-values, fovea_corner[0]-edge, s=9, c='green', marker='+', linewidths=3) # plt.scatter(fovea_corner[1]-values+fs, fovea_corner[0]-edge+fs, s=9, c='green', marker='+', linewidths=3) # plt.scatter(fovea_corner[1]-values, fovea_corner[0]-edge+fs, s=9, c='green', marker='+', linewidths=3) # plt.scatter(fovea_corner[1]-values+fs, fovea_corner[0]-edge, s=9, c='green', marker='+', linewidths=3) remove_axes() plt.tight_layout(-1) plt.show()
frame = load_stereo_frame(idrive, iframe) points = load_disparity_points(idrive, iframe) full_values = 128 frame_down_factor = 1 values = full_values / 2**frame_down_factor iters = 3 params = { 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, 'data_max': 32.024780646200725, 'ksize': 3 } frame = (downsample(frame[0], frame_down_factor), downsample(frame[1], frame_down_factor)) tic() coarse_disp = coarse_bp(frame, values=values, down_factor=1, iters=iters, **params) coarse_disp *= 2**frame_down_factor toc() tic() fine_disp = coarse_bp(frame, values=values, down_factor=0,
def test_foveal(frame_down_factor, fovea_fraction, fovea_n, post_smooth=None, **kwargs): values = full_values / 2**frame_down_factor mem_down_factor = 2 # relative to the frame down factor shape = (len(fovea_n) + 1, n_test_frames) times = np.zeros(shape) unweighted_cost = np.zeros(shape) weighted_cost = np.zeros(shape) for i_frame, index in enumerate(range(n_test_frames)): if index in [31, 82, 114]: # missing frames continue n_history_frames = 0 source = KittiMultiViewSource(index, test=False, n_frames=n_history_frames) full_shape = source.frame_ten[0].shape try: frame_ten, frame_shape, average_disp, true_points = get_test_case( source) except IOError: # likely does not have a full 20 frames print("Skipping index %d (lacks frames)" % index) continue frame_shape = frame_ten[0].shape average_disp = upsample_average_disp(average_disp, frame_down_factor, frame_shape, values=values) true_disp = points_image(true_points, frame_shape, full_shape=full_shape) true_disp_d = downsample(true_disp, mem_down_factor)[:, values / 2**mem_down_factor:] average_disp_d = downsample(average_disp, mem_down_factor) fovea_pixels = np.ceil(fovea_fraction * frame_shape[0] * (frame_shape[1] - values)) if fovea_pixels > 0: fovea_height = np.minimum(frame_shape[0], np.ceil(fovea_pixels**.5)) fovea_width = np.minimum(frame_shape[1], np.ceil(fovea_pixels / fovea_height)) fovea_shape = fovea_height, fovea_width else: fovea_shape = (0, 0) # --- static fovea params = { 'data_exp': 1.09821084614, 'data_max': 112.191597317, 'data_weight': 0.0139569211273, 'disc_max': 12.1301410452, 'laplacian_ksize': 3, 'smooth': 1.84510833504e-07 } # params = { # 'data_exp': 14.2348581842, 'data_max': 79101007093.4, # 'data_weight': 0.000102496570364, 'disc_max': 4.93508276126, # 'laplacian_ksize': 5, 'laplacian_scale': 0.38937704644, # 'smooth': 0.00146126755993} # optimized for frame_down: 1, mem_down: 2, fovea_levels: 1 params.update(kwargs) fovea_corner = ((frame_shape[0] - fovea_shape[0]) / 2, (frame_shape[1] - fovea_shape[1])) t = time.time() foveal_disp = foveal_bp(frame_ten, np.array(fovea_corner), fovea_shape, values=values, post_smooth=post_smooth, **params) foveal_disp *= 2**frame_down_factor times[0, i_frame] = time.time() - t unweighted_cost[0, i_frame] = cost_on_points(foveal_disp[:, values:], true_points, full_shape=full_shape, clip=error_clip) weighted_cost[0, i_frame] = cost_on_points(foveal_disp[:, values:], true_points, average_disp, full_shape=full_shape, clip=error_clip) # --- moving foveas for i_fovea, fovea_n_i in enumerate(fovea_n): foveal = Foveal(average_disp, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, max_n_foveas=fovea_n_i, **params) foveal.post_smooth = post_smooth if 0: # use ground truth importance pos_weights = get_position_weights(true_disp_d.shape) importance = get_importance(pos_weights, average_disp_d, true_disp_d) importance[true_disp_d == 0] = 0 edge = 3 mask = np.zeros(importance.shape, dtype=bool) mask[edge:-edge, edge:-edge] = 1 importance[~mask] = 0 # plt.figure() # plt.imshow(true_disp_d) # plt.colorbar() # plt.figure() # plt.imshow(importance) # plt.colorbar() # plt.show() foveal_disp, fovea_corner = foveal.process_frame( frame_ten, cost=importance) else: # use estimated importance foveal_disp, fovea_corner = foveal.process_frame(frame_ten) foveal_time = foveal.bp_time unweighted_cost[i_fovea + 1, i_frame] = cost_on_points(foveal_disp[:, values:], true_points, full_shape=full_shape, clip=error_clip) weighted_cost[i_fovea + 1, i_frame] = cost_on_points(foveal_disp[:, values:], true_points, average_disp, full_shape=full_shape, clip=error_clip) times[i_fovea + 1, i_frame] = foveal_time return times, unweighted_cost, weighted_cost
disp[1:,:] = np.maximum(disp[1:,:], disp[0:-1,:]) def interp(disp): valid_mask = disp >= 0 coords = np.array(np.nonzero(valid_mask)).T values = disp[valid_mask] it = interpolate.NearestNDInterpolator(coords, values) # it = interpolate.LinearNDInterpolator(coords, values, fill_value=0) return it(list(np.ndindex(disp.shape))).reshape(disp.shape) if __name__ == '__main__': down_factor = 2 step = 2**down_factor; source = KittiSource(51, 20) gt = downsample(source.ground_truth[0], down_factor=down_factor) fig = plt.figure(1) fig.clf() h = plt.imshow(gt, vmin=0, vmax=64) plt.show(block=False) shape = gt.shape mem = DisparityMemory(shape, down_factor, fovea_shape=(30,90), fill_method='smudge') for i in range(20): gt = downsample(source.ground_truth[i], down_factor=down_factor) pos = source.positions[i] mem.move(pos)
def process_frame(self, pos, frame): start_time = time.time() self.disparity_memory.move(pos) if self.n_past_fovea > 0: self.fovea_memory.move(pos) if self.use_uncertainty: self.uncertainty_memory.move(pos) if self.verbose: print('move time: ' + str(time.time() - start_time)) # 1. Decide where to put fovea and move it there: if self.disparity_memory.n > 0 and len(self.disparity_memory.transforms) == 0: fovea_corner = ( np.array(self.frame_shape) - np.array(self.fovea_shape)) / 2 assert all(fovea_corner >= 0) else: # a) Transform disparity from previous frame and calculate importance if self.disparity_memory.n > 0: prior_disparity = self.disparity_memory.transforms[0] #in current frame coords else: # TODO: we don't have GPS for multiview, so we're temporarily replacing past estimate with coarse # estimate from current frame params = { 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, 'data_max': 32.024780646200725, 'laplacian_ksize': 3} prior_disparity = coarse_bp(frame, down_factor=self.mem_down_factor, iters=5, values=self.values, **params) prior_disparity *= self.frame_step prior_disparity = prior_disparity[:,self.values/self.mem_step:] importance = self._uc.get_importance(prior_disparity) # b) Transform uncertainty from previous frame and multiply by importance if self.use_uncertainty: uncertainty = np.ones_like(importance) if len(self.uncertainty_memory.transforms) > 0: uncertainty = self.uncertainty_memory.transforms[0] cost = importance * uncertainty else: cost = importance assert cost.shape == self.memory_shape # c) Find region of highest cost and put fovea there # mem_fovea_shape = np.array(self.fovea_shape) / self.mem_step # fovea_corner = _choose_fovea(cost, mem_fovea_shape, 0) # fovea_corner = np.array(fovea_corner) * self.mem_step + np.array([0, self.values]) mem_fovea_shape = np.array(self.fovea_shape) / self.mem_step fovea_corners, mem_fovea_shape = _choose_foveas( cost, mem_fovea_shape, self.values/self.mem_step, self.max_n_foveas) # ### debug plot # print(fovea_corners) # print(mem_fovea_shape) # plt.imshow(cost, vmin=0, vmax=128/self.mem_step) # for (fi, fj) in fovea_corners: # fm, fn = mem_fovea_shape # plt.plot([fj, fj+fn, fj+fn, fj, fj], [fi, fi, fi+fm, fi+fm, fi], 'white') # plt.colorbar() # plt.show() # ### # rescale shape and corners and trim fovea to image ... fovea_shape = np.array(mem_fovea_shape) * self.mem_step # this isn't redundant because _choose_foveas chooses the multifovea shape fovea_corners = np.array(fovea_corners, dtype='int32') for i in range(len(fovea_corners)): fovea_corners[i] = np.array(fovea_corners[i]) * self.mem_step + np.array([0, self.values]) # fovea corner can be slightly too large, due to rounding errors fovea_max = np.array(self.frame_shape) - fovea_shape fovea_corners[i] = np.minimum(fovea_corners[i], fovea_max) assert fovea_corners[i][0] >= 0 and fovea_corners[i][1] >= self.values assert all(fovea_corners[i] + fovea_shape <= self.frame_shape) if self.verbose: print('choose time: ' + str(time.time() - start_time)) # 2. Calculate disparity and store in memory: if len(self.fovea_memory.transforms) == 0: seed = np.zeros((0,0), dtype='uint8') else: seed = np.zeros(self.frame_shape, dtype='uint8') for t in self.fovea_memory.transforms: seed += t if self.verbose: print('seed time: ' + str(time.time() - start_time)) # --- fovea boundaries in frame coordinates ... bp_time = time.time() disp = foveal_bp( frame, fovea_corners, fovea_shape, seed, values=self.values, **self.params) self.bp_time = time.time() - bp_time # disp = coarse_bp(frame, down_factor=1, iters=3, values=self.values, **self.params) # disp = cv2.pyrUp(disp)[:self.frame_shape[0], :self.frame_shape[1]] # keep all disparities in full image coordinates disp *= self.frame_step if self.verbose: print('BP time: ' + str(time.time() - start_time)) # --- downsample and remember disparity downsampled = downsample(disp[:,self.values:], self.mem_down_factor) assert downsampled.shape == self.memory_shape self.disparity_memory.remember(pos, downsampled) if self.n_past_fovea > 0: self.fovea_memory.remember(pos, disp, fovea_corner=fovea_corner) # 3. Calculate uncertainty and store in memory if self.use_uncertainty and len(self.disparity_memory.transforms) > 0: prior_disparity = self.disparity_memory.transforms[0] uncertainty = np.abs(downsampled - prior_disparity) self.uncertainty_memory.remember(pos, uncertainty) if self.verbose: print('finish time: ' + str(time.time() - start_time)) #TODO: use multiple fovea_corners in history return disp, fovea_corners
def __init__(self, average_disparity, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False, memory_length=1, max_n_foveas=1, **bp_args): """ Arguments --------- average_disparity - full resolution mean-disparity image frame_down_factor - number of times incoming frame has already been downsampled mem_down_factor - number of additional times that memory is downsampled fovea_shape - at full resolution values - depth of disparity volume at full resolution """ self.verbose = verbose self.use_uncertainty = False self.n_past_fovea = 0 # self.frame_down_factor = frame_down_factor self.mem_down_factor = mem_down_factor self.frame_step = 2**frame_down_factor self.mem_step = 2**mem_down_factor #step size for uncertainty and importance calculations (pixels) self.average_disparity = downsample( average_disparity, down_factor=mem_down_factor) self.frame_shape = frame_shape self.fovea_shape = fovea_shape self.memory_shape = self.average_disparity.shape self.values = values self.max_n_foveas = max_n_foveas # self.params = { # 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, # 'data_max': 32.024780646200725, 'laplacian_ksize': 3} # original hyperopt # self.params = { # 'data_weight': 0.15109941436798274, 'disc_max': 44.43671813879002, # 'data_max': 68.407170602610137, 'laplacian_ksize': 5} # hyperopt on 100 images # self.params = { # 'data_weight': 0.2715404479972163, 'disc_max': 2.603682635476145, # 'data_max': 156312.43116792402, 'laplacian_ksize': 3} # Bryan's hyperopt on 250 images # self.params = { # 'data_weight': 1.2, 'disc_max': 924.0, # 'data_max': 189.0, 'laplacian_ksize': 5} # random # self.params = { # 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, # 'data_max': 32.024780646200725, 'laplacian_ksize': 3} # coarse self.params = { 'data_exp': 1.09821084614, 'data_max': 112.191597317, 'data_weight': 0.0139569211273, 'disc_max': 12.1301410452, 'laplacian_ksize': 3, 'smooth': 1.84510833504e-07} # self.params = { # 'data_exp': 14.2348581842, 'data_max': 79101007093.4, # 'data_weight': 0.000102496570364, 'disc_max': 4.93508276126, # 'laplacian_ksize': 5, 'laplacian_scale': 0.38937704644, # 'smooth': 0.00146126755993} # optimized for frame_down: 1, mem_down: 2, fovea_levels: 1 self.params.update(bp_args) self.disparity_memory = DisparityMemory(self.memory_shape, n=memory_length) self.uncertainty_memory = DisparityMemory(self.memory_shape, n=memory_length) self.fovea_memory = DisparityMemory(frame_shape, fovea_shape=fovea_shape, n=self.n_past_fovea) self._uc = UnusuallyClose(self.average_disparity)
############################################### # source = KittiSource(51, 5) # source = KittiSource(51, 30) # source = KittiSource(51, 100) # source = KittiSource(51, 249) source = KittiSource(51, 10) # source = KittiSource(91, None) frame_down_factor = 1 mem_down_factor = 2 # relative to the frame down factor coarse_down_factor = 2 # for the coarse comparison full_values = 128 frame_shape = downsample(source.video[0][0], frame_down_factor).shape # fovea_shape = np.array(frame_shape)/4 # fovea_shape = (0, 0) fovea_shape = (80, 80) # fovea_shape = (120, 120) average_disp = downsample( get_average_disparity(source.ground_truth), frame_down_factor) values = frame_shape[1] - average_disp.shape[1] filter = Filter(average_disp, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False, memory_length=0) fig = plt.figure(1) plt.show(block=False) table = defaultdict(list)
from kitti.data import Calib from kitti.raw import load_stereo_video, load_video_odometry from kitti.velodyne import load_disparity_points from bp_wrapper import downsample from data import KittiSource from importance import get_average_disparity from filter import Filter, cost drive = 51 n_frames = 100 # n_frames = None source = KittiSource(drive, n_frames) frame_down_factor = 1 frame_shape = downsample(source.video[0][0], frame_down_factor).shape fovea_shape = (80, 80) average_disparity = downsample( get_average_disparity(source.ground_truth), frame_down_factor) values = frame_shape[1] - average_disparity.shape[1] mem_down_factor = 1 def objective(args): args['ksize'] = int(args['ksize']) filter = Filter(average_disparity, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False) filter.params = dict(args)
def interp(disp): valid_mask = disp >= 0 coords = np.array(np.nonzero(valid_mask)).T values = disp[valid_mask] it = interpolate.NearestNDInterpolator(coords, values) # it = interpolate.LinearNDInterpolator(coords, values, fill_value=0) return it(list(np.ndindex(disp.shape))).reshape(disp.shape) if __name__ == '__main__': down_factor = 2 step = 2**down_factor source = KittiSource(51, 20) gt = downsample(source.ground_truth[0], down_factor=down_factor) fig = plt.figure(1) fig.clf() h = plt.imshow(gt, vmin=0, vmax=64) plt.show(block=False) shape = gt.shape mem = DisparityMemory(shape, down_factor, fovea_shape=(30, 90), fill_method='smudge') for i in range(20): gt = downsample(source.ground_truth[i], down_factor=down_factor)
def process_frame(self, pos, frame): start_time = time.time() self.disparity_memory.move(pos) if self.n_past_fovea > 0: self.fovea_memory.move(pos) if self.use_uncertainty: self.uncertainty_memory.move(pos) if self.verbose: print('move time: ' + str(time.time() - start_time)) # 1. Decide where to put fovea and move it there: if self.disparity_memory.n > 0 and len( self.disparity_memory.transforms) == 0: fovea_corner = (np.array(self.frame_shape) - np.array(self.fovea_shape)) / 2 assert all(fovea_corner >= 0) else: # a) Transform disparity from previous frame and calculate importance if self.disparity_memory.n > 0: prior_disparity = self.disparity_memory.transforms[ 0] #in current frame coords else: # TODO: we don't have GPS for multiview, so we're temporarily replacing past estimate with coarse # estimate from current frame params = { 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, 'data_max': 32.024780646200725, 'laplacian_ksize': 3 } prior_disparity = coarse_bp(frame, down_factor=self.mem_down_factor, iters=5, values=self.values, **params) prior_disparity *= self.frame_step prior_disparity = prior_disparity[:, self.values / self.mem_step:] importance = self._uc.get_importance(prior_disparity) # b) Transform uncertainty from previous frame and multiply by importance if self.use_uncertainty: uncertainty = np.ones_like(importance) if len(self.uncertainty_memory.transforms) > 0: uncertainty = self.uncertainty_memory.transforms[0] cost = importance * uncertainty else: cost = importance assert cost.shape == self.memory_shape # c) Find region of highest cost and put fovea there # mem_fovea_shape = np.array(self.fovea_shape) / self.mem_step # fovea_corner = _choose_fovea(cost, mem_fovea_shape, 0) # fovea_corner = np.array(fovea_corner) * self.mem_step + np.array([0, self.values]) mem_fovea_shape = np.array(self.fovea_shape) / self.mem_step fovea_corners, mem_fovea_shape = _choose_foveas( cost, mem_fovea_shape, self.values / self.mem_step, self.max_n_foveas) # ### debug plot # print(fovea_corners) # print(mem_fovea_shape) # plt.imshow(cost, vmin=0, vmax=128/self.mem_step) # for (fi, fj) in fovea_corners: # fm, fn = mem_fovea_shape # plt.plot([fj, fj+fn, fj+fn, fj, fj], [fi, fi, fi+fm, fi+fm, fi], 'white') # plt.colorbar() # plt.show() # ### # rescale shape and corners and trim fovea to image ... fovea_shape = np.array( mem_fovea_shape ) * self.mem_step # this isn't redundant because _choose_foveas chooses the multifovea shape fovea_corners = np.array(fovea_corners, dtype='int32') for i in range(len(fovea_corners)): fovea_corners[i] = np.array( fovea_corners[i]) * self.mem_step + np.array( [0, self.values]) # fovea corner can be slightly too large, due to rounding errors fovea_max = np.array(self.frame_shape) - fovea_shape fovea_corners[i] = np.minimum(fovea_corners[i], fovea_max) assert fovea_corners[i][0] >= 0 and fovea_corners[i][ 1] >= self.values assert all(fovea_corners[i] + fovea_shape <= self.frame_shape) if self.verbose: print('choose time: ' + str(time.time() - start_time)) # 2. Calculate disparity and store in memory: if len(self.fovea_memory.transforms) == 0: seed = np.zeros((0, 0), dtype='uint8') else: seed = np.zeros(self.frame_shape, dtype='uint8') for t in self.fovea_memory.transforms: seed += t if self.verbose: print('seed time: ' + str(time.time() - start_time)) # --- fovea boundaries in frame coordinates ... bp_time = time.time() disp = foveal_bp(frame, fovea_corners, fovea_shape, seed, values=self.values, **self.params) self.bp_time = time.time() - bp_time # disp = coarse_bp(frame, down_factor=1, iters=3, values=self.values, **self.params) # disp = cv2.pyrUp(disp)[:self.frame_shape[0], :self.frame_shape[1]] # keep all disparities in full image coordinates disp *= self.frame_step if self.verbose: print('BP time: ' + str(time.time() - start_time)) # --- downsample and remember disparity downsampled = downsample(disp[:, self.values:], self.mem_down_factor) assert downsampled.shape == self.memory_shape self.disparity_memory.remember(pos, downsampled) if self.n_past_fovea > 0: self.fovea_memory.remember(pos, disp, fovea_corner=fovea_corner) # 3. Calculate uncertainty and store in memory if self.use_uncertainty and len(self.disparity_memory.transforms) > 0: prior_disparity = self.disparity_memory.transforms[0] uncertainty = np.abs(downsampled - prior_disparity) self.uncertainty_memory.remember(pos, uncertainty) if self.verbose: print('finish time: ' + str(time.time() - start_time)) #TODO: use multiple fovea_corners in history return disp, fovea_corners
def __init__(self, average_disparity, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False, memory_length=1, max_n_foveas=1, **bp_args): """ Arguments --------- average_disparity - full resolution mean-disparity image frame_down_factor - number of times incoming frame has already been downsampled mem_down_factor - number of additional times that memory is downsampled fovea_shape - at full resolution values - depth of disparity volume at full resolution """ self.verbose = verbose self.use_uncertainty = False self.n_past_fovea = 0 # self.frame_down_factor = frame_down_factor self.mem_down_factor = mem_down_factor self.frame_step = 2**frame_down_factor self.mem_step = 2**mem_down_factor #step size for uncertainty and importance calculations (pixels) self.average_disparity = downsample(average_disparity, down_factor=mem_down_factor) self.frame_shape = frame_shape self.fovea_shape = fovea_shape self.memory_shape = self.average_disparity.shape self.values = values self.max_n_foveas = max_n_foveas # self.params = { # 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, # 'data_max': 32.024780646200725, 'laplacian_ksize': 3} # original hyperopt # self.params = { # 'data_weight': 0.15109941436798274, 'disc_max': 44.43671813879002, # 'data_max': 68.407170602610137, 'laplacian_ksize': 5} # hyperopt on 100 images # self.params = { # 'data_weight': 0.2715404479972163, 'disc_max': 2.603682635476145, # 'data_max': 156312.43116792402, 'laplacian_ksize': 3} # Bryan's hyperopt on 250 images # self.params = { # 'data_weight': 1.2, 'disc_max': 924.0, # 'data_max': 189.0, 'laplacian_ksize': 5} # random # self.params = { # 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, # 'data_max': 32.024780646200725, 'laplacian_ksize': 3} # coarse self.params = { 'data_exp': 1.09821084614, 'data_max': 112.191597317, 'data_weight': 0.0139569211273, 'disc_max': 12.1301410452, 'laplacian_ksize': 3, 'smooth': 1.84510833504e-07 } # self.params = { # 'data_exp': 14.2348581842, 'data_max': 79101007093.4, # 'data_weight': 0.000102496570364, 'disc_max': 4.93508276126, # 'laplacian_ksize': 5, 'laplacian_scale': 0.38937704644, # 'smooth': 0.00146126755993} # optimized for frame_down: 1, mem_down: 2, fovea_levels: 1 self.params.update(bp_args) self.disparity_memory = DisparityMemory(self.memory_shape, n=memory_length) self.uncertainty_memory = DisparityMemory(self.memory_shape, n=memory_length) self.fovea_memory = DisparityMemory(frame_shape, fovea_shape=fovea_shape, n=self.n_past_fovea) self._uc = UnusuallyClose(self.average_disparity)
def rationale(): """ Figure that illustrates rationale for fovea approach, i.e. diminishing returns with increasing runtime via further iterations at a given downfactor, but possibly too-high cost of using lower downfactor. """ # n_frames = 194 n_frames = 50 frame_down_factor = 1 source = KittiMultiViewSource(0) full_shape = source.frame_ten[0].shape frame_shape = downsample(source.frame_ten[0], frame_down_factor).shape params = { 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, 'data_max': 32.024780646200725, 'laplacian_ksize': 3 } # iterations = [1, 2, 3] iterations = [1, 2, 3, 4, 5, 7, 10, 15] mean_coarse_times = [] mean_coarse_costs = [] mean_fine_times = [] mean_fine_costs = [] for j in range(len(iterations)): print(str(iterations[j]) + ' iterations') coarse_times = [] coarse_costs = [] fine_times = [] fine_costs = [] for i in range(n_frames): print(' frame ' + str(i)) source = KittiMultiViewSource(i) true_points = source.get_ground_truth_points(occluded=False) frame_down_factor = 1 coarse_time, coarse_cost = _evaluate_frame(source.frame_ten, true_points, frame_shape, full_shape, frame_down_factor + 1, frame_down_factor, iterations[j], params) fine_time, fine_cost = _evaluate_frame(source.frame_ten, true_points, frame_shape, full_shape, frame_down_factor + 0, frame_down_factor, iterations[j], params) coarse_times.append(coarse_time) coarse_costs.append(coarse_cost) fine_times.append(fine_time) fine_costs.append(fine_cost) mean_coarse_times.append(np.mean(coarse_times)) mean_coarse_costs.append(np.mean(coarse_costs)) mean_fine_times.append(np.mean(fine_times)) mean_fine_costs.append(np.mean(fine_costs)) print(mean_coarse_times) print(mean_coarse_costs) print(mean_fine_times) print(mean_fine_costs) plt.plot(mean_coarse_times, mean_coarse_costs, color='k', marker='s', markersize=12) plt.plot(mean_fine_times, mean_fine_costs, color='k', marker='o', markersize=12) plt.xlabel('Runtime (s)', fontsize=18) plt.ylabel('Mean absolute disparity error (pixels)', fontsize=18) plt.gca().tick_params(labelsize='18') plt.show()
idrive = 51 iframe = 50 frame = load_stereo_frame(idrive, iframe) points = load_disparity_points(idrive, iframe) full_values = 128 frame_down_factor = 1 values = full_values / 2**frame_down_factor iters = 3 params = { 'data_weight': 0.16145115747533928, 'disc_max': 294.1504935618425, 'data_max': 32.024780646200725, 'ksize': 3} frame = (downsample(frame[0], frame_down_factor), downsample(frame[1], frame_down_factor)) tic() coarse_disp = coarse_bp(frame, values=values, down_factor=1, iters=iters, **params) coarse_disp *= 2**frame_down_factor toc() tic() fine_disp = coarse_bp(frame, values=values, down_factor=0, iters=iters, **params) fine_disp *= 2**frame_down_factor toc() # fovea_corners = (60, 360) # fovea_shapes = (80, 80) fovea_corners = [(60, 160), (60, 360)]
############################################### # source = KittiSource(51, 5) # source = KittiSource(51, 30) # source = KittiSource(51, 100) # source = KittiSource(51, 249) source = KittiSource(51, 10) # source = KittiSource(91, None) frame_down_factor = 1 mem_down_factor = 2 # relative to the frame down factor coarse_down_factor = 2 # for the coarse comparison full_values = 128 frame_shape = downsample(source.video[0][0], frame_down_factor).shape # fovea_shape = np.array(frame_shape)/4 # fovea_shape = (0, 0) fovea_shape = (80, 80) # fovea_shape = (120, 120) average_disp = downsample(get_average_disparity(source.ground_truth), frame_down_factor) values = frame_shape[1] - average_disp.shape[1] filter = Filter(average_disp, frame_down_factor, mem_down_factor, fovea_shape, frame_shape, values, verbose=False,