def __init__(self, ysize, xsize): self.g = maxflow.GraphFloat() self.nodeids = self.g.add_grid_nodes((ysize, xsize)) self.ysize = ysize self.xsize = xsize
def score(self, idx, state): _, h, w = state.shape graph = maxflow.GraphFloat(h * w) nodes = graph.add_grid_nodes((h, w)) regions = state[:2] f = regions[0] b = regions[1] graph.add_grid_tedges(nodes, b, f) boundaries = state[2:] x = boundaries[0, :, 1:] y = boundaries[1, 1:] graph.add_grid_edges(nodes[:, 1:], x, [[0, 0, 0], [0, 0, 1], [0, 0, 0]], True) graph.add_grid_edges(nodes[1:], y, [[0, 0, 0], [0, 0, 0], [0, 1, 0]], True) graph.maxflow() seg = graph.get_grid_segments(nodes) gt_name = self.getIMName( os.path.join(self.gt_path, self.states[idx]['impath'])) gt = np.array(Image.open(gt_name).resize((224, 224), 0)) == 255 return (gt & seg).sum() / (gt | seg).sum()
def GrabCut(img, trimap): D = build_energy_function(img, trimap) flattened_img = img.reshape((-1, 3)) l, m, _ = img.shape D = np.reshape(D, (2, l, m)) Beta = 0.0 for i in range(l * m): for j in range(l * m): if (i == j): continue diff = flattened_img[i] - flattened_img[j] Beta += np.sum(diff * diff) Beta = Beta / (l * m) Beta = 1 / (2 * Beta) pixel_indices = np.reshape(np.arange(0, l * m, 1), (l, m)) gr = maxflow.GraphFloat() nodes = gr.add_nodes(l * m) K = 0 for i in range(1, l, 2): for j in range(1, m, 2): current_index = i * m + j neighbors = pixel_indices[max(0, i - 1):i + 2, max(0, j - 1):j + 2] curr_K = 0 for neighbor_index in np.nditer(neighbors): if (neighbor_index == current_index): continue diff = flattened_img[current_index] - flattened_img[ neighbor_index] dist = np.linalg.norm(diff) capacity = 50 * np.exp(-1 * Beta * np.sum(diff)) gr.add_edge(nodes[current_index], nodes[neighbor_index], capacity, capacity) curr_K += capacity K = max(K, curr_K) K = K + 1 for i in range(l): for j in range(m): current_index = i * m + j if (trimap[i][j] == 0): # (i, j) is BG. cap_src = 0 cap_dst = K elif (trimap[i][j] == 1): # (i, j) is FG. cap_src = K cap_dst = 0 else: cap_src = D[0, i, j] cap_dst = D[1, i, j] gr.add_tedge(current_index, cap_src, cap_dst) gr.maxflow() sgm = gr.get_grid_segments(nodes) return sgm * 1.0
def applyPyMaxflow(self): g = maxflow.GraphFloat() nodes = g.add_nodes(len(self.nodes) - 2) visistedPairOfNodes = [] tedges = [[] for i in range(len(self.nodes) - 2)] for edge in self.edges: if edge.origin == self.foregroundNodeIndex or edge.origin == self.backgroundNodeIndex or edge.destination == self.foregroundNodeIndex or edge.destination == self.backgroundNodeIndex: if edge.origin == self.foregroundNodeIndex or edge.origin == self.backgroundNodeIndex: tedges[edge.destination].append(edge) continue if not ( (edge.origin, edge.destination) in visistedPairOfNodes) and not ( (edge.destination, edge.origin) in visistedPairOfNodes): visistedPairOfNodes.append((edge.origin, edge.destination)) g.add_edge(edge.origin, edge.destination, edge.weight, edge.weight) for edges in tedges: foregroundEdge = None backgroundEdge = None if edges[0].origin == self.foregroundNodeIndex: foregroundEdge = edges[0] backgroundEdge = edges[1] else: foregroundEdge = edges[1] backgroundEdge = edges[0] g.add_tedge(foregroundEdge.destination, backgroundEdge.weight, foregroundEdge.weight) flow = g.maxflow() finalForegroundListMask = g.get_grid_segments(nodes) finalForegroundMask = np.zeros(self.image.shape[:2], dtype=bool) for y in range(self.image.shape[0]): finalForegroundMask[ y, :] = finalForegroundListMask[y * self.image.shape[1]:(y + 1) * self.image.shape[1]] self.foregroundImage = np.copy(self.image) self.foregroundImage[finalForegroundMask == False] = 0 self.backgroundImage = np.copy(self.image) self.backgroundImage[finalForegroundMask == True] = 0
def func(M, I, mix): G, indices, S, T = None, None, None, None mix1 = mix mix2 = 1 - mix nrow, ncol = I.shape[0], I.shape[1] # 建立索引 i_mask, j_mask = np.where(M == 0) # 初始化图信息 s, t, w = [], [], [] # 这里的是逐行扫描,matlab里的是逐列 indices = np.column_stack((i_mask, j_mask)) indices_len = len(indices) K0 = np.zeros((nrow, ncol)) # 构建索引矩阵,这里是先行后列 # 建立矩阵是为了用来较好的判断像素邻接点 for k in range(indices_len): K0[indices[k][0], indices[k][1]] = k # 初始化终端结点 S = indices_len + 1 T = indices_len + 1 # 构建一个图G G = maxflow.GraphFloat() node_idx = G.add_grid_nodes(len(indices)) for k in range(indices_len): x, y = indices[k][0], indices[k][1] if x < nrow - 1: if M[x + 1, y] == 0: w = I[x + 1, y] G.add_edge(k, K0[x + 1, y], w, w) # s.append(k) # t.append(K0[x+1,y]) # # 这里的权重于matlab的不同是因为计算灰度的公式不同 # w.append(mix2*I[x+1,y]) if y < ncol - 1: if M[x, y + 1] == 0: w = I[x, y + 1] G.add_edge(k, K0[x, y + 1], w, w) # s.append(k) # t.append(K0[x,y+1]) # w.append(mix2*I[x,y+1]) return G, indices, S, T
def compute_labels(self, seed_mask): num_rows = self.num_rows num_cols = self.num_cols self.img = self.pres.get_topmost_img() g = maxflow.GraphFloat() nodeids = g.add_grid_nodes((num_rows, num_cols)) structure_x = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 0]]) structure_y = np.array([[0, 0, 0], [0, 0, 0], [0, 1, 0]]) # calculate weights of edges n_right, n_below, t_sink, t_source = self.create_weight_map(seed_mask) # add weights and structure to graph g.add_grid_edges(nodeids, weights=n_right, structure=structure_x, symmetric=True) g.add_grid_edges(nodeids, weights=n_below, structure=structure_y, symmetric=True) # add t-links g.add_grid_tedges(nodeids, t_source, t_sink) # run maxflow algroithm g.maxflow() # get algorithm results segments_truth_table = g.get_grid_segments(nodeids) label_mask = np.where(segments_truth_table, self.obj_value, self.bgr_value) return label_mask # # class GraphCutsSegmentation(Segmentation): # def __init__(self, imgs): # super().__init__(imgs) # def compute_labels(self, seed_mask): # return label_mask
def GraphFloat(fc_img): g = maxflow.GraphFloat() nodeids = g.add_grid_nodes(fc_img.shape) structure = np.array([[1, 1, 1], [1, 0, 1], [1, 1, 1]]) g.add_grid_edges(nodeids, fc_img, structure=structure, symmetric=True) left_most = np.concatenate( (np.arange(fc_img.shape[0]).reshape(1, fc_img.shape[0]), np.zeros((1, fc_img.shape[0])))).astype(np.uint64) left_most = np.ravel_multi_index(left_most, fc_img.shape) g.add_grid_tedges(left_most, np.inf, 0) right_most = np.concatenate( (np.arange(fc_img.shape[0]).reshape(1, fc_img.shape[0]), np.ones((1, fc_img.shape[0])) * (np.size(fc_img, 1) - 1))).astype( np.uint64) right_most = np.ravel_multi_index(right_most, fc_img.shape) g.add_grid_tedges(right_most, 0, np.inf) x = g.maxflow() return x
def graphcut(image, fore, back, lam, sig): image = np.float64(image) / 255 h, w, _ = image.shape graph = maxflow.GraphFloat(h * w) nodes = graph.add_grid_nodes((h, w)) dx = image[:, 1:] - image[:, :-1] dy = image[1:] - image[:-1] struct_left = [[0, 0, 0], [0, 0, lam], [0, 0, 0]] struct_down = [[0, 0, 0], [0, 0, 0], [0, lam, 0]] nx = n_weight(dx, sig) ny = n_weight(dy, sig) graph.add_grid_edges(nodes[:, 1:], nx, struct_left, True) graph.add_grid_edges(nodes[1:], ny, struct_down, True) fseeds = image[fore] bseeds = image[back] flat_img = image.reshape(-1, 3) f_weights = figtree(fseeds, flat_img, np.ones(fseeds.shape[0]), sig, eval="direct").reshape(h, w) b_weights = figtree(bseeds, flat_img, np.ones(bseeds.shape[0]), sig, eval="direct").reshape(h, w) t_weights = f_weights + b_weights f = f_weights / t_weights b = b_weights / t_weights f[fore] = 1 b[back] = 1 graph.add_grid_tedges(nodes, b, f) graph.maxflow() return graph.get_grid_segments(nodes)
def constructGraph(self): self.BG_prob = self.BG_GMM.score_samples(self._flatten_data).reshape(self.row, self.col) self.FG_prob = self.FG_GMM.score_samples(self._flatten_data).reshape(self.row, self.col) self.graph = maxflow.GraphFloat() nodeids = self.graph.add_grid_nodes((self.row, self.col)) for y in range(self.row): for x in range(self.col): ## assign data term ## if self._mask[y, x] == self.GC_PR_BG or self._mask[y, x] == self.GC_PR_FG: fromSource = -self.BG_prob[y, x] toSink = -self.FG_prob[y, x] elif self._mask[y, x] == self.GC_BG: fromSource = 0 toSink = self.lamb else: #FG fromSource = self.lamb toSink = 0 self.graph.add_tedge(nodeids[y, x], fromSource, toSink) ## assign smooth term ## if x > 0: # left term exists w = self.h_weight[y, x] self.graph.add_edge(nodeids[y, x], nodeids[y, x-1], w, w) if y > 0: # upper term exists w = self.v_weight[y, x] self.graph.add_edge(nodeids[y, x], nodeids[y-1, x], w, w) if x > 0 and y > 0: # upper left term exists w = self.n_weight[y, x] self.graph.add_edge(nodeids[y-1, x-1], nodeids[y, x], w, w) if x < self.col - 1 and y > 0: # upper right term exists w = self.p_weight[y, x] self.graph.add_edge(nodeids[y-1, x+1], nodeids[y, x], w, w) print('graph construction end...') print('maxflow: {}'.format(self.graph.maxflow())) self.nodeids = nodeids
def min_cut(voxel_code_array, pole_position_array, intensity_array, point_counts_array): """ segment the voxels in to n segments based on the position array using min cut algorithm the original min cut algorithm comes from the paper: "An Experimental Comparison of Min-Cut/Max-Flow Algorithms for Energy Minimization in Vision."Yuri Boykov and Vladimir Kolmogorov. In IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), September 2004 Args: voxel_code_array: the voxels to be segmented position_array: the position label of the voxels, the label could be more than 2 Return: """ # K in the article k = 3.40282e+038 voxel_length = len(voxel_code_array) reached_flag = [False] * voxel_length voxel_count = 0 intensity_var = np.var(np.vectorize(int)(intensity_array)) point_count_var = np.var(np.vectorize(int)(point_counts_array)) g = maxflow.GraphFloat() nodes = g.add_nodes(voxel_length) positions = pole_position_array[pole_position_array != '0'] unique_positions = list(set(positions)) first_positions = voxel_code_array[pole_position_array == unique_positions[0]] second_positions = voxel_code_array[pole_position_array == unique_positions[1]] center_x1 = int(first_positions[0][0:4]) center_y1 = int(first_positions[0][4:8]) center_x2 = int(second_positions[0][0:4]) center_y2 = int(second_positions[0][4:8]) distance = ((center_x1 - center_x2)**2 + (center_y1 - center_y2)**2)**0.5 while voxel_count < voxel_length: center_x = int(voxel_code_array[voxel_count][0:4]) center_y = int(voxel_code_array[voxel_count][4:8]) foot_x, foot_y = get_foot_point(center_x1, center_y1, center_x2, center_y2, center_x, center_y) # d1 = ((center_x - center_x1)**2 + (center_y - center_y1)**2)**0.5 # d2 = ((center_x - center_x2)**2 + (center_y - center_y2)**2)**0.5 d1 = ((foot_x - center_x1)**2 + (foot_y - center_y1)**2)**0.5 d2 = ((foot_x - center_x2)**2 + (foot_y - center_y2)**2)**0.5 v = (foot_x - center_x1) * (foot_x - center_x2) + ( foot_y - center_y1) * (foot_y - center_y2) if voxel_code_array[voxel_count] in first_positions: g.add_tedge(nodes[voxel_count], k, 0) elif voxel_code_array[voxel_count] in second_positions: g.add_tedge(nodes[voxel_count], 0, k) elif v >= 0: if d1 > d2: g.add_tedge(nodes[voxel_count], 0, k) else: g.add_tedge(nodes[voxel_count], k, 0) else: g.add_tedge(nodes[voxel_count], -math.log(d1 / distance) * 0.02, -math.log(d2 / distance) * 0.02) reached_flag[voxel_count] = True right = "{:0>4d}".format(int(voxel_code_array[voxel_count][0:4]) + 1) + voxel_code_array[voxel_count][4:12] left = "{:0>4d}".format(int(voxel_code_array[voxel_count][0:4]) - 1) + voxel_code_array[voxel_count][4:12] front = voxel_code_array[voxel_count][0:4] + "{:0>4d}".format(int(voxel_code_array[voxel_count][4:8]) + 1) +\ voxel_code_array[voxel_count][8:12] back = voxel_code_array[voxel_count][0:4] + "{:0>4d}".format(int(voxel_code_array[voxel_count][4:8]) - 1) +\ voxel_code_array[voxel_count][8:12] up = voxel_code_array[voxel_count][0:8] + "{:0>4d}".format( int(voxel_code_array[voxel_count][8:12]) + 1) down = voxel_code_array[voxel_count][0:8] + "{:0>4d}".format( int(voxel_code_array[voxel_count][8:12]) - 1) neighbor_list = [right, left, front, back, up, down] for neighbor in neighbor_list: indice = np.where(np.array(voxel_code_array) == neighbor) if len(indice[0]) == 0: continue indice = indice[0][0] if reached_flag[indice] is False: intensity_dif = math.fabs( int(intensity_array[indice]) - int(intensity_array[voxel_count])) point_count_dif = math.fabs( int(point_counts_array[indice]) - int(point_counts_array[voxel_count])) smoothcost = math.exp(-point_count_dif**2 / (2 * point_count_var)) g.add_edge(nodes[voxel_count], nodes[indice], smoothcost, smoothcost) voxel_count += 1 flow = g.maxflow() return map(g.get_segment, nodes)
def __init__(self, node_max=0, edge_max=0): self.graph = maxflow.GraphFloat() self.Econst = 0
import maxflow import numpy as np if __name__ == '__main__': g = maxflow.GraphFloat() node_ids = g.add_grid_nodes((3, 3)) structure = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 0]]) weights = np.array([[1, 2, 1], [4, 5, 1], [7, 8, 1]]) g.add_grid_edges(node_ids, weights=weights, structure=structure, symmetric=False) g.maxflow() a = g.get_segment(8) print(a)
def graphCut(img, center, radius, temp, edge, count, editPoints, padList, theta_width, phi_width): """outputs two images. The first image shows the segmented object in white against a black background. The second image delineates the edge of the segmented image. Increase th_div and phi_div in their respective spinboxes for more accurate segmentation""" """Important note. The labeled image is referred to as temp, or self.temp in the interface. This stands for template. The previously labled image is fed back into the graphcut""" """create polar images and cost arrays""" print "RUNNING GRAPHCUT!" img = padImage(img, padList) temp = padImage(temp, padList) edge = padImage(edge, padList) center = padCenter(center, padList) polar_img = img2polar(img, center, radius, theta_width=theta_width, phi_width=phi_width) polar_grad, y, x = np.gradient(np.array(polar_img, dtype='float')) """Lockett 100416 replacement line below to not use gradient when the image has a surface label""" """polar_grad = -1 * np.array(polar_img, dtype='float')""" polar_cost = -1 * np.ones(polar_img.shape) for r in range(1, radius): polar_cost[r] = polar_grad[r] - polar_grad[r - 1] """ flip the cost image upside down. This is so that the base set is at the bottom of the array since the graphcut cuts from top to bottom, this inversion is necessary. """ polar_cost_inv = polar_cost[::-1, :, :] print "CONSTRUCTING GRAPH EDGES... " """construct the graph using PyMaxFlow""" g = maxflow.GraphFloat() nodeids = g.add_grid_nodes(polar_img.shape) structure = np.zeros((3, 3, 3)) structure[2] = np.array([[0, 10000, 0], [10000, 10000, 10000], [0, 10000, 0]]) g.add_grid_edges(nodeids, structure=structure, symmetric=False) """convert the previously labeled image (temp) into a polar transform image. Take the labels and give them high cost edge weights so the segmentation avoids previously labeled objects""" polar_lbl_img = img2polar(temp, center, radius, theta_width=theta_width, phi_width=phi_width) polar_lbl_img_inv = polar_lbl_img[::-1, :] lbl_caps = polar_lbl_img_inv > 0 self_caps = (polar_lbl_img_inv == count) lbl_caps -= self_caps lbl_source_caps = np.zeros(lbl_caps.shape) lbl_sink_caps = lbl_caps * 10000 g.add_grid_tedges(nodeids, lbl_source_caps, lbl_sink_caps) structure2 = 10000 * np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0]]) g.add_grid_edges(nodeids[radius - 1], structure=structure2, symmetric=True) """add terminal edges using two arrays whose elemnts are the costs of the edges from the source and to the sink""" print "CONSTRUCTING GRAPH TEDGES..." sinkcaps = polar_cost_inv * (polar_cost_inv >= 0) sourcecaps = -1 * polar_cost_inv * (polar_cost_inv < 0) g.add_grid_tedges(nodeids, sourcecaps, sinkcaps) """accounts for edit points. Takes every point in the edit point list, converts it to its spherical coordinate, and adds high cost edges in the column of that edit point inverts the x and y coordinates of the center""" center = np.array((center[0], center[2], center[1])) if len(editPoints) != 0: for coords in editPoints: rad = math.sqrt((center[0] - coords[0])**2 + (center[1] - coords[2])**2 + (center[2] - coords[1])**2) theta = math.atan2(center[2] - coords[1], coords[2] - center[1]) print str((coords[0] - center[0]) / (rad + 1)) phi = math.acos(float(coords[0] - center[0]) / (rad + 1)) if theta < 0: theta = 2 * math.pi + theta theta = theta_width - theta_width * theta / (2 * math.pi) - 1 phi = phi_width * phi / (math.pi) - 1 rad = radius - rad print "POLAR COORDS: " + str((rad, theta, phi)) for r in range(0, radius): if r <= rad: g.add_tedge(nodeids[r, theta, phi], 0, 10000) else: g.add_tedge(nodeids[r, theta, phi], 10000, 0) print "CUTTING GRAPH..." g.maxflow() """s-t mincut of graph. This is converted to cartesian coordinates with the function img2cart. The images are also closed to eliminate spotty areas""" print "STARTING CARTESIAN TRANSFORM..." polar_img_seg = np.invert(g.get_grid_segments(nodeids)[::-1, :, :]) edge_img = np.zeros(img.shape) seg_img = ndimage.binary_closing( img2cart(img, polar_img_seg, center, radius, theta_width, phi_width)) """create an edge image of the segmented object""" strel = np.ones((3, 3, 3)) erode_img = ndimage.binary_erosion(seg_img, strel) edge_img = np.logical_xor(seg_img, erode_img) """shears the segmentation image and edge if padding was applied""" """add the object back on to the template image (and the edge image back on the template edge) If there was an editpoint involved, remove the previous segmentation of that object and add back on the edited object""" if len(editPoints) != 0: del_img = (temp == count) * count temp -= del_img del_edge_img = (edge == count) * count edge -= del_edge_img temp += seg_img * count edge += edge_img * count temp = shearImage(temp, padList) edge = shearImage(edge, padList) print "FINISHED!" return temp, edge
import maxflow import numpy as np import matplotlib.pyplot as plt import time if __name__ == '__main__': print(maxflow.__version__) ## g = maxflow.GraphFloat(2, 2) nodes = g.add_nodes(2) g.add_edge(nodes[0], nodes[1], 1, 2) g.add_tedge(nodes[0], 2, 5) g.add_tedge(nodes[1], 9, 4) flow = g.maxflow() print('maxflow: {}'.format(flow)) for i in range(g.get_node_num()): print('seg of node {}: {}'.format(i, g.get_segment(i))) ## g = maxflow.GraphFloat() node_idxs = g.add_grid_nodes((1, 2)) g.add_grid_edges(node_idxs, 50) g.add_grid_edges(node_idxs, 1, 3) g.maxflow() seg = g.get_grid_segments(node_idxs) print(seg) img = np.int_(np.logical_not(seg)) plt.imshow(img) # plt.show(block=False)
def higo_baseline(experiment_state, data_adapter, cache_path, higo_settings): """ Reimplementation of the Higo et al. 2009 paper Tomoaki Higo, Yasuyuki Matsushita, Neel Joshi, and Katsushi Ikeuchi A hand-held photometric stereo camera for 3-d modeling ICCV2009 Uses the PyMaxFlow library for graphcut problem: http://pmneila.github.io/PyMaxflow/. This library is installed as a python package by the installEnv.sh script. """ if not isinstance(experiment_state.locations, DepthMapParametrization): error("Higo et al. 2009 requires a depth map parametrization.") os.makedirs(cache_path, exist_ok=True) device = torch.device(general_settings.device_name) with torch.no_grad(): step_size = higo_settings['step_size'] step_radius = higo_settings['step_radius'] depth_range = step_size * step_radius # 2.5cm nr_steps = 2 * step_radius + 1 eta = higo_settings['eta'] * general_settings.intensity_scale lambda_n = higo_settings['lambda_n'] lambda_s = higo_settings['lambda_s'] * step_size * 1000 lambda_1 = higo_settings['lambda_1'] lambda_2 = higo_settings['lambda_2'] surface_constraint_threshold = 0.005 # 5mm surface_constraint_threshold = surface_constraint_threshold / ( depth_range / nr_steps) surface_constraint_penalization = 0.010 # 1cm ## 1) calculate the photometric loss volume, and the depth/normal hypothesis volume N_pixels = experiment_state.locations.get_point_count() photo_loss_volume = torch.zeros(N_pixels, nr_steps).to(device) depth_volume = torch.zeros(N_pixels, nr_steps, 1).to(device) normal_volume = torch.zeros(N_pixels, nr_steps, 3).to(device) diffuse_volume = torch.zeros(N_pixels, nr_steps, 3).to(device) # we need multiples of the step size for the graph cut later on initial_depth = experiment_state.locations.implied_depth_image().clone( ) initial_depth.div_(step_size).round_().mul_(step_size) for offset_idx in tqdm( range(nr_steps), desc="Solving all RANSAC problems (photometric loss volume)"): depth_offset = -depth_range + offset_idx * step_size cache_file = os.path.join(cache_path, "%8.6f.npz" % depth_offset) depth = initial_depth + depth_offset if os.path.exists(cache_file): cached = np.load(cache_file) normals = to_torch(cached['normals']) inliers_N = to_torch(cached['inliers_N']) inlier_photometric_error = to_torch( cached['inlier_photometric_error']) albedo = to_torch(cached['diffuse']) else: spoofed_experiment_state = ExperimentState.copy( experiment_state) spoofed_experiment_state.locations = DepthMapParametrization() spoofed_experiment_state.locations.initialize( depth, experiment_state.locations.mask, experiment_state.locations.invK, experiment_state.locations.invRt, ) normals, albedo, inliers, residuals = closed_form_lambertian_solution( spoofed_experiment_state, data_adapter, sample_radius=0, shadows_occlusions=False, verbose=False) inlier_photometric_error = (residuals * inliers).abs().sum( dim=1).sum(dim=1) inliers_N = inliers.squeeze().sum(dim=1) np.savez_compressed( cache_file, normals=to_numpy(normals), diffuse=to_numpy(albedo), inliers_N=to_numpy(inliers_N), inlier_photometric_error=to_numpy( inlier_photometric_error), ) depth_volume[:, offset_idx, 0] = experiment_state.locations.create_vector(depth) normal_volume[:, offset_idx] = normals diffuse_volume[:, offset_idx] = albedo.squeeze() photo_loss_volume[:, offset_idx] = eta * inlier_photometric_error / inliers_N - inliers_N # precalculation of neighbour relationships mask = experiment_state.locations.mask py, px = torch.meshgrid( [torch.arange(0, mask.shape[0]), torch.arange(0, mask.shape[1])]) pixels = torch.stack(( px[mask.squeeze()], py[mask.squeeze()], ), dim=0).to(device) indices = torch.zeros(*mask.shape[:2], 1).long().to(device) - 1 indices[mask] = torch.arange(N_pixels).to(device)[:, None] indices = torch.nn.functional.pad(indices, pad=(0, 0, 1, 1, 1, 1), value=-1) neighbours = [] for offset in [[-1, 0], [1, 0], [0, -1], [0, 1]]: offset_pixels = pixels + 1 # because of the padding for c in range(2): offset_pixels[c, :] += offset[c] offset_linidces = offset_pixels[ 0, :] + offset_pixels[1, :] * indices.shape[1] neighbours.append(indices.flatten()[offset_linidces]) neighbours = torch.stack(neighbours, dim=1) surface_constrain_cachefile = os.path.join( cache_path, "surface_normal_constraint.npz") ## 2) calculate the surface normal constraint loss volume: if not os.path.exists(surface_constrain_cachefile): # we add in a nonsense 'neighbour' that will never be able to win, for implementational cleanliness surface_constraint_volume = torch.zeros(N_pixels, nr_steps).to(device) neighbours_n = neighbours.clone() neighbours_n[neighbours_n < 0] = N_pixels depth_volume_n = torch.cat( (depth_volume, torch.zeros(1, nr_steps, 1).to(device))) normal_volume_n = torch.cat( (normal_volume, torch.ones(1, nr_steps, 3).to(device))) pixel_locs = torch.cat( (pixels.float(), torch.ones(1, pixels.shape[1]).to(device)), dim=0) pixel_locs_n = torch.cat( (pixel_locs, torch.zeros(pixel_locs.shape[0], 1).to(device)), dim=1) for offset_idx in tqdm( range(nr_steps), desc="Generating the surface constraint loss volume"): hypothesis_points = experiment_state.locations.invK @ ( pixel_locs * depth_volume[None, :, offset_idx, 0]) hypothesis_normals = normal_volume[:, offset_idx].transpose(0, 1) for n_idx in range(4): these_neighbours = neighbours_n[:, n_idx] n_pixel_locs = pixel_locs_n[:, these_neighbours] best_label_points = torch.zeros(3, N_pixels).to(device) best_label_normals = torch.zeros(3, N_pixels).to(device) best_label_offsets = torch.zeros(N_pixels).to(device) best_label_pdists = torch.zeros(N_pixels).to( device) + np.inf for n_offset_idx in range(nr_steps): n_hypothesis_points = experiment_state.locations.invK @ ( n_pixel_locs * depth_volume_n[None, these_neighbours, n_offset_idx, 0]) n_hypothesis_normals = normal_volume_n[ these_neighbours, n_offset_idx].transpose(0, 1) n_hypothesis_pdists = ( hypothesis_normals * (hypothesis_points - n_hypothesis_points)).abs().sum(dim=0) better_matches = n_hypothesis_pdists < best_label_pdists best_label_offsets[better_matches] = n_offset_idx best_label_pdists[ better_matches] = n_hypothesis_pdists[ better_matches] best_label_points[:, better_matches] = n_hypothesis_points[:, better_matches] best_label_normals[:, better_matches] = n_hypothesis_normals[:, better_matches] hypothesis_ldists = (best_label_offsets - offset_idx).abs() valid_best_labels = hypothesis_ldists < surface_constraint_threshold hypothesis_pdists = ( best_label_normals * (hypothesis_points - best_label_points)).abs().sum( dim=0) # we don't have parallel depth planes, however! surface_constraint_volume[ valid_best_labels, offset_idx] += hypothesis_pdists[valid_best_labels] surface_constraint_volume[ valid_best_labels == False, offset_idx] = surface_constraint_penalization np.savez_compressed( surface_constrain_cachefile, surface_constraint_volume=to_numpy(surface_constraint_volume), ) else: cached = np.load(surface_constrain_cachefile) surface_constraint_volume = to_torch( cached['surface_constraint_volume']) # at this point we can calculate the unary result, i.e. without TV-1 depth smoothness unary_loss_volume = photo_loss_volume + lambda_n * surface_constraint_volume winners_unary = torch.argmin(unary_loss_volume, dim=1) graphcut_cache_file = os.path.join(cache_path, "higo_graphcut.npz") if not os.path.exists(graphcut_cache_file): # 3) Graph-cut magic. TV-1 optimization on the depth # because we now have discretized depth values, there is only a finite number of labels to optimize over. # As such, we can use the graph construction from "Stereo Without Epipolar Lines: A Maximum-Flow Formulation" depth_min = depth_volume.min() depth_max = depth_volume.max() n_hyps = round((depth_max - depth_min).item() / step_size) + 1 depth_hypotheses = depth_min + ( depth_max - depth_min ) * torch.arange(n_hyps).float().to(device) / (n_hyps - 1) depth_hypotheses.div_(step_size).round_().mul_(step_size) # make it amenable to graphcut optimization, i.e. all positive values safe_unary_loss_volume = unary_loss_volume.clone() safe_unary_loss_volume = safe_unary_loss_volume - ( safe_unary_loss_volume.min() - 1) # a value definitely higher than the optimal solution's loss cost_upper_bound = safe_unary_loss_volume.sum( dim=0).sum().item() + 1 # create the bigger volume of unary weights # because of the way graphcut imposes smoothness cost this is the easiest to implement full_unary_loss_volume = torch.zeros( len(unary_loss_volume), n_hyps).to(device) + cost_upper_bound for step in range(nr_steps): # fill in these unary losses in the correct position in the full volume full_idces = ((depth_volume[:, step] - depth_min) / step_size).round().long() full_values = safe_unary_loss_volume[:, step, None] full_unary_loss_volume.scatter_(dim=1, index=full_idces, src=full_values) full_offsets = (depth_volume[:, 0, 0] - depth_min).div_(step_size).round_() import maxflow graph = maxflow.GraphFloat() node_ids = graph.add_grid_nodes((N_pixels, n_hyps + 1)) for hyp in tqdm( range(1, n_hyps + 1), desc="Building optimization graph - unary weights"): nodepairs = node_ids[:, hyp - 1:hyp + 1] edgeweights = to_numpy(full_unary_loss_volume[:, hyp - 1:hyp]) graph.add_grid_edges(nodepairs, weights=edgeweights, structure=np.array([[0, 0, 0], [0, 0, 1], [0, 0, 0]]), symmetric=1) # build terminal edges for x in tqdm(range(N_pixels), desc="Building optimization graph - terminal edges"): graph.add_tedge(node_ids[x, 0], cost_upper_bound, 0) graph.add_tedge(node_ids[x, n_hyps], 0, cost_upper_bound) # debug test: not including the smoothness loss *should* mean that we get exactly the unary winners # print("Starting unary maxflow calculation...") # tic = time.time() # unary_max_flow = graph.maxflow() # print("Finished in %ss" % (time.time() - tic)) # unary_min_cut = graph.get_grid_segments(node_ids) # winners_unary_test = np.nonzero(unary_min_cut[:,1:] != unary_min_cut[:,:-1])[1] # assert np.all(winners_unary_test == to_numpy(winners_unary) + to_numpy(full_offsets)), "Issue building the graph: unary solution does not match unary WTA" no_neighbour_node = graph.add_nodes(1) neighbours_g = to_numpy(neighbours) neighbours_g[neighbours_g < 0] = len(neighbours) node_ids_g = np.concatenate( (node_ids, np.ones((1, n_hyps + 1), dtype=node_ids.dtype) * no_neighbour_node), axis=0) for n_idx in range(4): neighbour_ids = np.take(node_ids_g[:, :-1], indices=neighbours_g[:, n_idx], axis=0) nodepairs = np.stack((node_ids[:, :-1], neighbour_ids), axis=2) edgeweights = lambda_s candidates = nodepairs[:, :, 1] != no_neighbour_node candidates = to_numpy((depth_volume[:, 0] - step_size * 3 <= depth_hypotheses[None]) * (depth_volume[:, -1] + step_size * 3 >= depth_hypotheses[None])) * candidates graph.add_grid_edges(nodepairs[candidates].reshape(-1, 2), weights=edgeweights, structure=np.array([[0, 0, 0], [0, 0, 1], [0, 0, 0]]), symmetric=0) print("Starting full maxflow calculation...") tic = time.time() max_flow = graph.maxflow() print("Finished in %ss" % (time.time() - tic)) min_cut = graph.get_grid_segments(node_ids) nonzeroes = np.nonzero(min_cut[:, 1:] != min_cut[:, :-1]) unique_nonzeroes = np.unique(nonzeroes[0], return_index=True) winners_graphcut = nonzeroes[1][unique_nonzeroes[1]] winners_graphcut = winners_graphcut - to_numpy(full_offsets) np.savez_compressed( graphcut_cache_file, winners_graphcut=winners_graphcut, ) else: cached = np.load(graphcut_cache_file) winners_graphcut = cached['winners_graphcut'] winners_graphcut = to_torch(winners_graphcut).long() #4) Depth refinement step def tangent_vectors(depth_image, invK, inv_extrinsics=None): """ Given HxWx1 depth map, return quick-and-dirty the HxWx3 LR and UP tangents Takes the weighted left-right and up-down neighbour points as spanning the local plane. """ assert len(depth_image.shape) == 3, "Depth map should be H x W x 1" assert depth_image.shape[2] == 1, "Depth map should be H x W x 1" H = depth_image.shape[0] W = depth_image.shape[1] data_shape = list(depth_image.shape) world_coords = depth_map_to_locations(depth_image, invK, inv_extrinsics) depth1 = depth_image[:-2, 1:-1] * depth_image[ 1:-1, 1:-1] * depth_image[2:, 1:-1] * depth_image[ 1:-1, :-2] * depth_image[1:-1, 2:] depth2 = depth_image[:-2, :-2] * depth_image[:-2, 2:] * depth_image[ 2:, :-2] * depth_image[2:, 2:] depth_mask = (depth1 * depth2 == 0).float() ud_vectors = (world_coords[:-2,1:-1,:] * 2 + world_coords[:-2,0:-2,:] + world_coords[:-2,2:,:]) \ - (world_coords[2:,1:-1,:] * 2 + world_coords[2:,0:-2,:] + world_coords[2:,2:,:]) ud_vectors = ud_vectors / (depth_mask + ud_vectors.norm(dim=2, keepdim=True)) lr_vectors = (world_coords[1:-1,:-2,:] * 2 + world_coords[0:-2,:-2,:] + world_coords[2:,:-2,:]) \ - (world_coords[1:-1,2:,:] * 2 + world_coords[0:-2,2:,:] + world_coords[2:,2:,:]) lr_vectors = lr_vectors / (depth_mask + lr_vectors.norm(dim=2, keepdim=True)) repad = lambda x: torch.nn.functional.pad(x, pad=(0, 0, 1, 1, 1, 1)) return repad(lr_vectors), repad(ud_vectors), repad( (depth_mask == 0).float()) def get_laplacian(depth_image, mask): kernel = to_torch( np.array([ [-0.25, -0.50, -0.25], [-0.50, 3.00, -0.50], [-0.25, -0.50, -0.25], ])).float() laplacian = torch.nn.functional.conv2d( depth_image[None, None, :, :, 0], kernel[None, None])[0, 0, :, :, None] laplacian_mask = torch.nn.functional.conv2d( mask[None, None, :, :, 0].float(), torch.ones_like(kernel)[None, None])[0, 0, :, :, None] == 9 repad = lambda x: torch.nn.functional.pad(x, pad=(0, 0, 1, 1, 1, 1)) return repad(laplacian * laplacian_mask.float()) depth_estimate = torch.gather(depth_volume, dim=1, index=winners_graphcut[:, None, None]) depth_estimate.requires_grad_(True) center_mask = mask.view(*mask.shape[:2], 1) with torch.no_grad(): normal_estimate = torch.gather(normal_volume, dim=1, index=winners_graphcut[:, None, None].expand( -1, -1, 3)) normal_image = torch.zeros(*center_mask.shape[:2], 3).to(device) normal_image.masked_scatter_(center_mask, normal_estimate) depth_estimate_initial = depth_estimate.clone() depth_image_initial = torch.zeros(*center_mask.shape[:2], 1).to(device) depth_image_initial.masked_scatter_(center_mask, depth_estimate_initial) pixel_locs = torch.cat( (pixels.float(), torch.ones(1, pixels.shape[1]).to(device)), dim=0) loop = tqdm(range(1000), desc="Depth refinement") loss_evolution = [] optimizer = torch.optim.Adam([depth_estimate], eps=1e-5, lr=0.0001, betas=[0.9, 0.99]) for iteration in loop: # term 1: position error initial_points = experiment_state.locations.invK @ ( pixel_locs * depth_estimate_initial.view(1, -1)) current_points = experiment_state.locations.invK @ ( pixel_locs * depth_estimate.view(1, -1)) position_diff = (initial_points - current_points).abs() position_error = (position_diff**2).sum() # term 2: the normal error depth_image = torch.zeros(*center_mask.shape[:2], 1).to(device) depth_image.masked_scatter_(center_mask, depth_estimate) lr_img, ud_img, mask_img = tangent_vectors( depth_image, experiment_state.locations.invK, experiment_state.locations.invRt) lr = lr_img[center_mask.expand_as(lr_img)].view(-1, 1, 3) ud = ud_img[center_mask.expand_as(ud_img)].view(-1, 1, 3) mask = mask_img[center_mask].view(-1, 1) normal_error = (mask * (((lr * normal_estimate).sum(dim=2)**2) + ((ud * normal_estimate).sum(dim=2)**2))).sum() # term 3: smoothness constraint laplacian = get_laplacian(depth_image, center_mask) laplacian_masked = laplacian # * (laplacian.abs() < 5 * step_size).float() smoothness_constraint = laplacian_masked.abs().sum() # the backprop total_loss = 1e5 * position_error + 10 * normal_error + 3000 * smoothness_constraint loss_evolution.append(total_loss.item()) loop.set_description("Depth refinement | loss %8.6f" % loss_evolution[-1]) optimizer.zero_grad() total_loss.backward() optimizer.step() plt.clf() plt.plot(loss_evolution) plt.savefig(os.path.join(cache_path, "higo_refinement_loss.png")) plt.close() # now return a new experiment_state new_experiment_state = ExperimentState.copy(experiment_state) new_experiment_state.locations = DepthMapParametrization() new_experiment_state.locations.initialize( experiment_state.locations.create_image(depth_estimate[:, 0]).squeeze(), experiment_state.locations.mask, experiment_state.locations.invK, experiment_state.locations.invRt, ) winner_normals = torch.gather(normal_volume, dim=1, index=winners_graphcut[:, None, None].expand( -1, -1, 3)).squeeze() winner_diffuse = torch.gather(diffuse_volume, dim=1, index=winners_graphcut[:, None, None].expand( -1, -1, 3)).squeeze() new_experiment_state.normals = experiment_state.normals.__class__( new_experiment_state.locations) new_experiment_state.normals.initialize(winner_normals) new_experiment_state.materials = experiment_state.materials.__class__( new_experiment_state.brdf) new_experiment_state.materials.initialize(winner_diffuse.shape[0], winner_diffuse, winner_diffuse.device) new_experiment_state.materials.brdf_parameters['specular'][ 'albedo'].data.zero_() new_experiment_state.materials.brdf_parameters['specular'][ 'roughness'].data.fill_(0.1) return new_experiment_state
def makeMove(graph, EnergyFunction, alphaLabel, assignment): """ @type graph: Graph @type u: int @type alphaLabel: int """ # new vertex numbers numberOfVertices = graph.numberOfVertices # introduce a new vertex alpha alpha = numberOfVertices numberOfVertices = numberOfVertices + 1 # introduce a new vertex alpha bar alphaBar = numberOfVertices numberOfVertices = numberOfVertices + 1 edgeList = [] newGraph = maxflow.GraphFloat() newGraph.add_nodes(graph.numberOfVertices) for vertex in range(graph.numberOfVertices): for u in graph.adjacencyList[vertex].keys(): if (assignment[vertex] == assignment[u]): e = EnergyFunction(vertex, assignment[vertex], u, assignment[u]) newGraph.add_edge(vertex, u, e, e) # edgeList.append((vertex, u, EnergyFunction( # vertex, assignment[vertex], u, assignment[u]))) else: newNode = newGraph.add_node() # numberOfVertices = numberOfVertices+1 e = EnergyFunction(vertex, assignment[vertex], u, alphaLabel) # edgeList.append((vertex, newNode, EnergyFunction( # vertex, assignment[vertex], u, alphaLabel))) newGraph.add_edge(vertex, newNode, e, e) e = EnergyFunction(vertex, alphaLabel, u, assignment[u]) # edgeList.append((newNode, u, EnergyFunction( # vertex, alphaLabel, u, assignment[u]))) newGraph.add_edge(newNode, u, e, e) e = EnergyFunction(vertex, assignment[vertex], u, assignment[u]) # edgeList.append((newNode, alphaBar, EnergyFunction( # vertex, assignment[vertex], u, assignment[u]))) newGraph.add_tedge(newNode, 0, e) if (assignment[vertex] != alphaLabel): e = EnergyFunction(vertex, assignment[vertex]) # edgeList.append( # (vertex, alphaBar, EnergyFunction(vertex, assignment[vertex]))) else: e = 1000000000 # edgeList.append( # (vertex, alphaBar, 1000000000)) newGraph.add_tedge(vertex, EnergyFunction(vertex, alphaLabel), e) # edgeList.append( # (vertex, alpha, EnergyFunction(vertex, alphaLabel)) # ) # for e # newGraph = Graph(numberOfVertices=numberOfVertices, edgeList=edgeList) # del edgeList[:] # del edgeList # newGraph = newGraph.DirectedVersion() print "line 148" k = newGraph.maxflow() print k # now the partition returns if a vertex is reachable by alpha or alpha bar # print newGraph.get_segments() # since the cut defines the assignment hence if i is reachable by alpha then (i,alpha) is on the cut and vice versa. So if partition[i]=alphaBar then the assignment is alpha for i in range(graph.numberOfVertices): if newGraph.get_segment(i): assignment[i] = alphaLabel return assignment
def labeling(self, points, model, estimator, neighborhood_graph, lambda_, threshold): """ 通过maxflow对Graph进行能量最小化求解,标记模型内点 参数 ---------- points : numpy 输入的数据点集 model : Model 当前的模型参数 estimator : Estimator 用于估计该模型的模型估计器 neighborhood_graph : GridNeighborGraph 当前数据点集的领域图 lambda_ : float 空间相干性能量项的权重 threshold : float 决定内点和外点的阈值 返回 ---------- list 标记的模型内点序号列表 """ self.graph = maxflow.GraphFloat() # 先构建空 Graph squared_truncated_threshold = threshold**2 point_number = np.shape(points)[0] self.graph.add_nodes(point_number) # 计算所有点的能量 Ep squared_residuals = [] for point_idx in range(point_number): squared_distance = estimator.squaredResidual( points[point_idx], model) c0 = squared_distance / squared_truncated_threshold c1 = 1.0 - c0 squared_residuals.append(squared_distance) self.__addTerm1(point_idx, c1, 0) # 空间相干性项权重须为正数,才能有效惩罚 if lambda_ > 0: e00, e01, e10, e11 = 0.0, 1.0, 1.0, 0.0 # 遍历所有点 p for point_idx in range(point_number): energy1 = max( 0, 1.0 - squared_residuals[point_idx] / squared_truncated_threshold) # 遍历所有 p 点的边界邻居点,计算能量值 neighbors = neighborhood_graph.getNeighbors(point_idx) for neighbor_idx in neighbors: if neighbor_idx == point_idx: continue energy2 = 1.0 - squared_residuals[ neighbor_idx] / squared_truncated_threshold e00 = 0.5 * (energy1 + energy2) e11 = 1.0 - e00 self.__addTerm2(point_idx, neighbor_idx, e00 * lambda_, lambda_, lambda_, e11 * lambda_) # 通过 maxflow 算法对图 G 进行能量最小化求解 self.graph.maxflow() # 记录内点的序号 inliers = [] for point_idx in range(point_number): # 1 表示给定点接近 SINK if self.graph.get_segment(point_idx) == 0: inliers.append(point_idx) return inliers