def calcbounds(self, bestupper=np.inf, force=False): tpos2 = self.pos2 - np.atleast_2d(self.disp) disps = [self.pos1[p[:,None],:] - tpos2[p[None,:],:] for p in self.perm] for d in disps: d -= np.round(d/self.boxvec) * self.boxvec dists = [np.norm(d, axis=2) for d in disps] ldists = [np.clip(d-0.5*self.width*sqrt(3),0,np.inf)**2 for d in dists] dists = [d**2 for d in dists] if not force: upperbound = sqrt(sum(d.min(0).sum() for d in dists)) else: upperbound = 0. if upperbound < bestupper or force: self.quick = False lowerperm = [lap(ld)[0] for ld in ldists] self.lowerbound = sqrt( sum(ldists[p,lp] for p,lp in izip(self.perm, lowerperm))) if self.lowerbound < bestupper: upperperm = [lap(d)[0] for d in dists] self.upperbound = sqrt( sum(dists[p,lp] for p,lp in izip(self.perm, upperperm))) else: upperperm = lowerperm self.upperbound = np.inf else: self.quick = True self.lowerbound = sqrt(sum(d.min(0).sum() for d in ldists)) upperperm = None return self.lowerbound, self.upperbound, upperperm
def calcbounds(self, bestupper=np.inf, force=False): rpos2 = self.pos2.dot(angle_axis2mat(self.rot)) if not force: d, p = self.pos1tree.query(rpos2) upperbound = norm(d) else: upperbound = 0 if upperbound < bestupper or force: self.quick = False dists = ((self.pos1[:, None, :] - rpos2[None, :, :])**2).sum(2) cosa = 0.5 * (self.r1pr2 - dists) / self.r1tr2 sina = sqrt(np.clip(1. - cosa**2, 0., 1.)) theta = min((sqrt(3) * self.width / 2), np.pi) cosd = cos(theta) sind = abs(sin(theta)) cosdm = np.where(cosa > cosd, 1., cosa * cosd + sina * sind) lowerbounddists = np.clip(self.r1pr2 - 2 * self.r1tr2 * cosdm, 0., np.inf) lowerperm = lap(lowerbounddists.copy())[0] self.lowerbound = sqrt( sum(lowerbounddists[i, j] for i, j in enumerate(lowerperm))) if self.lowerbound < bestupper: upperperm = lap(dists)[0] self.upperbound = norm(self.pos1 - rpos2[upperperm]) else: upperperm = lowerperm self.upperbound = np.inf else: self.quick = True cosa = 0.5 * (self.r1[p]**2 + self.r2**2 - d**2) / self.r1[p] / self.r2 sina = sqrt(np.clip(1. - cosa**2, 0., 1.)) theta = min((sqrt(3) * self.width / 2), np.pi) cosd = cos(theta) sind = abs(sin(theta)) cosdm = np.where(cosa > cosd, 1., cosa * cosd + sina * sind) lowerbounddists = np.clip( self.r1[p]**2 + self.r2**2 - 2 * self.r1[p] * self.r2 * cosdm, 0., np.inf) lowerbound = lowerbounddists.sum()**0.5 self.upperbound = upperbound self.lowerbound = lowerbound upperperm = p self.rpos2 = rpos2 return self.lowerbound, self.upperbound, upperperm
def linear_assignment(matrix): m = copy(matrix) assign = hungarian.lap(m) pair = [] for x in range(len(assign[0])): pair.append([x, assign[0][x]]) return pair
def weights_to_matching(p, index): # Convert the indices to numbers index_a = {} reverse_a = [] index_b = {} reverse_b = [] for (k_a, k_b) in index: if k_a not in index_a: index_a[k_a] = len(index_a) reverse_a.append(k_a) if k_b not in index_b: index_b[k_b] = len(index_b) reverse_b.append(k_b) weights = np.zeros((len(index_a), len(index_b))) for k in range(len(p)): (k_a, k_b) = index[k] weights[index_a[k_a]][index_b[k_b]] = p[k][1] matching = hungarian.lap(-weights)[0] # matches are stored in a dictionary res = {} for i in range(len(matching)): locu = reverse_a[i] four = reverse_b[matching[i]] w = weights[i][matching[i]] res[(locu, four)] = w return res
def BGM_evl(matrix, src_indexes, dst_indexes, gt, src_strip_indexes, dst_strip_indexes): mapping = hungarian.lap(matrix) distance = caldistance(mapping, matrix) hit = 0 print "Binary Similarity Socre is = " + str(distance) pdb.set_trace() index = -1 miss = [] for i in mapping[0]: index += 1 try: src = obtain_base(src_indexes[index], src_strip_indexes) dst = obtain_base(dst_indexes[i], dst_strip_indexes) except: continue if (src, dst) in gt: hit += 1 else: #pdb.set_trace() miss.append((src, dst)) print "s" re = st(gt, miss) pdb.set_trace() print "The BGM matching recall rate =" + str(hit * 1.0 / len(gt))
def calc_match(d): ret = np.zeros(len(d)) for t in xrange(sample_rounds): for i in xrange(len(d)): choices_a = [ random.randint(0, d[i].shape[0] - 1) for _ in xrange(match_points) ] choices_b = [ random.randint(0, d[i].shape[1] - 1) for _ in xrange(match_points) ] mat = d[i][choices_a] mat = (mat.T)[choices_b] am = np.array(mat) match = hungarian.lap(am)[0] #M = munkres.Munkres() #match = M.compute(am) x = 0.0 #g = [] for p in xrange(len(match)): #for p in match: x += mat[i][match[i]] #g.append(mat[i][match[i]]) #x += mat[p[0]][p[1]] #g.sort() #ret[i] += g[len(g) / 2] if len(g) % 2 == 1 else (g[len(g) / 2] + g[len(g) / 2 - 1]) * 0.5 ret[i] += x return ret
def get_best_score(products, customers): maxDim = max(len(customers), len(products)) rewardMatrix = np.zeros([maxDim, maxDim]) for i in range(len(products)): for j in range(len(customers)): rewardMatrix[i,j] = SS(products[i], customers[j]) costMatrix = max([max(x) for x in rewardMatrix]) - rewardMatrix choices = hungarian.lap(costMatrix)[0] value = sum([rewardMatrix[it, x] for it, x in enumerate(choices)]) return value
def run_hungarian(matrix, df_population, df_sample_condition): """Runs the hungarian linear assignment problem solver from the hungarian package. Takes in a matrix of datavalues and dataframes for the df_population and the df_sample_condition. Returns the matches as a new dataframe with the same structure as the df_population. """ row_assigns, col_assigns = hungarian.lap(matrix) interesting_indicies = [] for i in range(0, len(df_sample_condition)): interesting_indicies.append(col_assigns[i]) return df_population.ix[interesting_indicies]
def hungarian_pairing(turkeys, customers): """ Use the Hungarian algorithm to quickly pair. """ size = max(len(turkeys), len(customers)) matrix = np.matrix([[0.0] * size] * size) for t in range(size): for c in range(size): try: customer = customers[c] except IndexError: customer = None try: turkey = turkeys[t] except IndexError: turkey = None # calculate cost for this pairing if customer is None: # spare turkey, still has some value cost = -turkey.leftover_value() elif turkey is None: # no turkey for this customer, must buy him one # assume turkey is bought and sold with no profit made cost = 0 else: cost = -customer.agreed_price(turkey) matrix[c, t] = cost np.set_printoptions(threshold=np.nan) t_to_c, c_to_t = hungarian.lap(matrix) assign_t_to_c = {} assign_c_to_t = {} for t, c in enumerate(c_to_t): try: customer = customers[c] except IndexError: customer = None try: turkey = turkeys[t] except IndexError: turkey = None if turkey is not None: assign_t_to_c[turkey] = customer if customer is not None: assign_c_to_t[customer] = turkey return assign_t_to_c, assign_c_to_t
def _optimize_permutations_hungarian(X1, X2, make_cost_matrix): """ For a given set of positions X1 and X2, find the best permutation of the atoms in X2. The positions must already be reshaped to reflect the dimensionality of the system! Use an implementation of the Hungarian Algorithm in the Python package index (PyPi) called munkres (another name for the algorithm). The hungarian algorithm time scales as O(n^3), much faster than the O(n!) from looping through all permutations. http://en.wikipedia.org/wiki/Hungarian_algorithm http://pypi.python.org/pypi/munkres/1.0.5.2 another package, hungarian, implements the same routine in comiled C http://pypi.python.org/pypi/hungarian/ When I first downloaded this package I got segfaults. The problem for me was casing an integer pointer as (npy_intp *). I may add the corrected version to pele at some point """ X1 = X1.reshape(-1,3) X2 = X2.reshape(-1,3) ######################################### # create the cost matrix # cost[j,i] = (X1(i,:) - X2(j,:))**2 ######################################### cost = make_cost_matrix(X1, X2) #cost = np.sqrt(cost) ######################################### # run the hungarian algorithm ######################################### newind1 = hungarian.lap(cost) perm = newind1[1] #note: the hungarian algorithm changes #the cost matrix. I'm not sure why, and it may be a bug, #but the indices it returns are still correct # if not np.all(cost >= 0): # m = np.max(np.abs(cost-costsave)) # print "after hungarian cost greater than zero:, %g" % m ######################################### # apply the permutation ######################################### # TODO: how to get new distance? return perm
def getWassersteinDist(S, T): """ Perform the Wasserstein distance matching between persistence diagrams. Assumes first two columns of S and T are the coordinates of the persistence points, but allows for other coordinate columns (which are ignored in diagonal matching) :param S: Mx(>=2) array of birth/death pairs for PD 1 :param T: Nx(>=2) array of birth/death paris for PD 2 :returns (tuples of matched indices, total cost, (N+M)x(N+M) cross-similarity) """ import hungarian #Requires having compiled the library # Step 1: Compute CSM between S and T, including points on diagonal N = S.shape[0] M = T.shape[0] #Handle the cases where there are no points in the diagrams if N == 0: S = np.array([[0, 0]]) N = 1 if M == 0: T = np.array([[0, 0]]) M = 1 DUL = sklearn.metrics.pairwise.pairwise_distances(S, T) #Put diagonal elements into the matrix #Rotate the diagrams to make it easy to find the straight line #distance to the diagonal cp = np.cos(np.pi / 4) sp = np.sin(np.pi / 4) R = np.array([[cp, -sp], [sp, cp]]) S = S[:, 0:2].dot(R) T = T[:, 0:2].dot(R) D = np.zeros((N + M, N + M)) D[0:N, 0:M] = DUL UR = np.max(D) * np.ones((N, N)) np.fill_diagonal(UR, S[:, 1]) D[0:N, M:M + N] = UR UL = np.max(D) * np.ones((M, M)) np.fill_diagonal(UL, T[:, 1]) D[N:M + N, 0:M] = UL D = D.tolist() # Step 2: Run the hungarian algorithm matchidx = hungarian.lap(D)[0] matchidx = [(i, matchidx[i]) for i in range(len(matchidx))] matchdist = 0 for pair in matchidx: (i, j) = pair matchdist += D[i][j] return (matchidx, matchdist, D)
def getWassersteinDist(S, T): """ Perform the Wasserstein distance matching between persistence diagrams. Assumes first two columns of S and T are the coordinates of the persistence points, but allows for other coordinate columns (which are ignored in diagonal matching) :param S: Mx(>=2) array of birth/death pairs for PD 1 :param T: Nx(>=2) array of birth/death paris for PD 2 :returns (tuples of matched indices, total cost, (N+M)x(N+M) cross-similarity) """ import hungarian #Requires having compiled the library # Step 1: Compute CSM between S and T, including points on diagonal N = S.shape[0] M = T.shape[0] #Handle the cases where there are no points in the diagrams if N == 0: S = np.array([[0, 0]]) N = 1 if M == 0: T = np.array([[0, 0]]) M = 1 DUL = sklearn.metrics.pairwise.pairwise_distances(S, T) #Put diagonal elements into the matrix #Rotate the diagrams to make it easy to find the straight line #distance to the diagonal cp = np.cos(np.pi/4) sp = np.sin(np.pi/4) R = np.array([[cp, -sp], [sp, cp]]) S = S[:, 0:2].dot(R) T = T[:, 0:2].dot(R) D = np.zeros((N+M, N+M)) D[0:N, 0:M] = DUL UR = np.max(D)*np.ones((N, N)) np.fill_diagonal(UR, S[:, 1]) D[0:N, M:M+N] = UR UL = np.max(D)*np.ones((M, M)) np.fill_diagonal(UL, T[:, 1]) D[N:M+N, 0:M] = UL D = D.tolist() # Step 2: Run the hungarian algorithm matchidx = hungarian.lap(D)[0] matchidx = [(i, matchidx[i]) for i in range(len(matchidx))] matchdist = 0 for pair in matchidx: (i, j) = pair matchdist += D[i][j] return (matchidx, matchdist, D)
def getWassersteinDist(S, T): """ Assumes first two columns of S and T are the coordinates of the persistence points, but allows for other coordinate columns (which are ignored in diagonal matching) """ import hungarian #Requires having compiled the library N = S.shape[0] M = T.shape[0] #Handle the cases where there are no points in the diagrams if N == 0: S = np.array([[0, 0]]) N = 1 if M == 0: T = np.array([[0, 0]]) M = 1 SSqr = np.sum(S**2, 1) TSqr = np.sum(T**2, 1) DUL = SSqr[:, None] + TSqr[None, :] - 2 * S.dot(T.T) DUL[DUL < 0] = 0 DUL = np.sqrt(DUL) #Put diagonal elements into the matrix #Rotate the diagrams to make it easy to find the straight line #distance to the diagonal cp = np.cos(np.pi / 4) sp = np.sin(np.pi / 4) R = np.array([[cp, -sp], [sp, cp]]) S = S[:, 0:2].dot(R) T = T[:, 0:2].dot(R) D = np.zeros((N + M, N + M)) D[0:N, 0:M] = DUL UR = np.max(D) * np.ones((N, N)) np.fill_diagonal(UR, S[:, 1]) D[0:N, M:M + N] = UR UL = np.max(D) * np.ones((M, M)) np.fill_diagonal(UL, T[:, 1]) D[N:M + N, 0:M] = UL D = D.tolist() #Run the hungarian algorithm matchidx = hungarian.lap(D)[0] matchidx = [(i, matchidx[i]) for i in range(len(matchidx))] matchdist = 0 for pair in matchidx: (i, j) = pair matchdist += D[i][j] return (matchidx, matchdist, D)
def _optimize_permutations_hungarian(X1, X2, make_cost_matrix): """ For a given set of positions X1 and X2, find the best permutation of the atoms in X2. The positions must already be reshaped to reflect the dimensionality of the system! Use an implementation of the Hungarian Algorithm in the Python package index (PyPi) called munkres (another name for the algorithm). The hungarian algorithm time scales as O(n^3), much faster than the O(n!) from looping through all permutations. http://en.wikipedia.org/wiki/Hungarian_algorithm http://pypi.python.org/pypi/munkres/1.0.5.2 another package, hungarian, implements the same routine in comiled C http://pypi.python.org/pypi/hungarian/ When I first downloaded this package I got segfaults. The problem for me was casing an integer pointer as (npy_intp *). I may add the corrected version to pele at some point """ X1 = X1.reshape(-1, 3) X2 = X2.reshape(-1, 3) ######################################### # create the cost matrix # cost[j,i] = (X1(i,:) - X2(j,:))**2 ######################################### cost = make_cost_matrix(X1, X2) #cost = np.sqrt(cost) ######################################### # run the hungarian algorithm ######################################### newind1 = hungarian.lap(cost) perm = newind1[1] #note: the hungarian algorithm changes #the cost matrix. I'm not sure why, and it may be a bug, #but the indices it returns are still correct # if not np.all(cost >= 0): # m = np.max(np.abs(cost-costsave)) # print "after hungarian cost greater than zero:, %g" % m ######################################### # apply the permutation ######################################### # TODO: how to get new distance? return perm
def find_permutations_hungarian(X1, X2, make_cost_matrix=_make_cost_matrix): cost = make_cost_matrix(X1, X2) ######################################### # run the hungarian algorithm ######################################### newind1 = hungarian.lap(cost) perm = newind1[1] ######################################### # apply the permutation ######################################### # TODO: how to get new distance? dist = -1 return dist, perm
def compute_matching_assignment(sA, dA, sB, dB): la = len(sA) lb = len(sB) dist = np.zeros((la+lb, la+lb), dtype=np.uint32) for i,u in enumerate(sA): for j,v in enumerate(sB): dist[i,j] = diffsize(dA[u], dB[v]) # print dist for i,u in enumerate(sA): for j in range(lb, lb+la): dist[i,j] = options.creation_fudge*diffsize(dA[u], None) for i in range(la, la+lb): for j,v in enumerate(sB): dist[i,j] = options.creation_fudge*diffsize(None, dB[v]) lhs, rhs = hungarian.lap(dist) return lhs, rhs
def LAP(relays, locs): """ return values """ robot_to_loc = {} loc_to_robot={} """ we need to make arrays and then retrieve the key values """ locix_to_key = {} relayix_to_key={} relay_l = [] loc_l = [] if len(relays) < len(locs): print "WTF? locs > relays", len(locs), len(relays) return loc_to_robot, robot_to_loc n=max(len(relays),len(locs)) a = numpy.zeros(shape=(n,n)) i=0 for (rix, (rpos_x, rpos_y)) in relays.items(): relayix_to_key[i] = rix j=0 for (lix, (loc_x,loc_y)) in locs.items(): if i==0: locix_to_key[j]=lix d = dist( (rpos_x,rpos_y), (loc_x,loc_y)) a[i][j]=d j+=1 """ fill remaining with 0 """ while j<n: a[i][j] = 0 j+=1 i+=1 print "doing LAP nrelays ",len(relays),"nlocs",len(locs) print a [col, row] = hungarian.lap(a) print "relay ",relays print "locs ",locs print "col ",col print "row ",row for i in range(n): rix=relayix_to_key[i] if col[i] >= len(locs): print "warning: THIS SHOULD NOT HAPPEN" continue locix=locix_to_key[col[i]] robot_to_loc[rix]=locix loc_to_robot[locix] = rix return loc_to_robot,robot_to_loc
def LAP(relays, locs): """ return values """ robot_to_loc = {} loc_to_robot = {} """ we need to make arrays and then retrieve the key values """ locix_to_key = {} relayix_to_key = {} relay_l = [] loc_l = [] if len(relays) < len(locs): print "WTF? locs > relays", len(locs), len(relays) return loc_to_robot, robot_to_loc n = max(len(relays), len(locs)) a = numpy.zeros(shape=(n, n)) i = 0 for (rix, (rpos_x, rpos_y)) in relays.items(): relayix_to_key[i] = rix j = 0 for (lix, (loc_x, loc_y)) in locs.items(): if i == 0: locix_to_key[j] = lix d = dist((rpos_x, rpos_y), (loc_x, loc_y)) a[i][j] = d j += 1 """ fill remaining with 0 """ while j < n: a[i][j] = 0 j += 1 i += 1 print "doing LAP nrelays ", len(relays), "nlocs", len(locs) print a [col, row] = hungarian.lap(a) print "relay ", relays print "locs ", locs print "col ", col print "row ", row for i in range(n): rix = relayix_to_key[i] if col[i] >= len(locs): print "warning: THIS SHOULD NOT HAPPEN" continue locix = locix_to_key[col[i]] robot_to_loc[rix] = locix loc_to_robot[locix] = rix return loc_to_robot, robot_to_loc
def cal_diff_seq(self, src_seqlist, dst_seqlist): if len(src_seqlist) > len(dst_seqlist): matrix_len = len(dst_seqlist) else: matrix_len = len(src_seqlist) matrix = [] for src_id in range(matrix_len): row = [] src_node = src_seqlist[src_id] for dst_id in range(matrix_len): dst_node = dst_seqlist[dst_id] #pdb.set_trace() cost = distance.nlevenshtein(src_node, dst_node) row.append(cost) matrix.append(row) mapping = hungarian.lap(matrix) cost = self.cal_mapping_cost(mapping[0], matrix, src_seqlist, dst_seqlist) return cost
def cal_diff(self, src_constraint_set, dst_constraint_set, bodylist, row_name, colum_name, c_depth): difflist = {} for i in range(c_depth + 1): matrix = [] if i in src_constraint_set: src_set = src_constraint_set[i] else: src_set = [] if i in dst_constraint_set: dst_set = dst_constraint_set[i] else: dst_set = [] if len(src_set) > len(dst_set): matrix_len = len(src_set) else: matrix_len = len(dst_set) for src_id in range(matrix_len): row = [] for dst_id in range(matrix_len): try: dst_name = dst_set[dst_id] src_name = src_set[src_id] src_body = bodylist[ self.version['src']][row_name][src_name] dst_body = bodylist[ self.version['dst']][colum_name][dst_name] cost = distance.jaccard(src_body, dst_body) row.append(cost) except: row.append(1) matrix.append(row) #pdb.set_trace() if len(matrix) != 0: mapping = hungarian.lap(matrix) cost = self.cal_mapping_cost(mapping[0], matrix, src_set, dst_set) else: cost = 0 difflist[i] = cost return difflist
def process(self, matrixOrig): minDim = min(len(matrixOrig), len(matrixOrig[0])) maxDim = max(len(matrixOrig), len(matrixOrig[0])) matrix = squarify(matrixOrig,1.) a = numpy.array(matrix,"float32") pairs = hungarian.lap(a)[0] a_void = numpy.resize(a, (minDim,maxDim)) a_void.fill(1.0) for i, j in enumerate(pairs): if i >= minDim: break; a_void[i][j] = matrix[i][j] # self._context["matrix"] = a_void self._context["pairs"] = pairs return a_void
def process(self, matrixOrig): lDim = sorted([matrixOrig._height, matrixOrig._width]) matrixOrig = tds.squarify(matrixOrig,1.) matrix = matrixOrig.getMatrix() nMatrix = [[1.-val for val in line] for line in matrix] pairs = hungarian.lap(matrix)[0] # res = munk.maxWeightMatching(nMatrix) # pairs = [res[0][i] for i in res[0].keys()] lA_void = LinedMatrix(lDim[1],lDim[0],1.) for i in xrange(lDim[0]) : # j = pairs[i] ni = pairs[i]+i*lA_void._width lA_void.data[ni] = matrix[i][pairs[i]] # lA_void.set(j,i,matrix[i][j]) self._context["pairs"] = pairs return lA_void
def minCostMatching(weights, _flip=False): """ Compute minimum-cost assignment (by default) where weights represents cost by default. *_flip: if True, convert cost matrix *weights to an equivalent profit matrix """ _library= False try: import hungarian _library = True except: import os msg = "[minCostMatching] hungarian.so not found in %s?" % \ os.getcwd() print msg _weights = np.array(weights) if not _library: if not _flip: # W represents cost by default _weights = flip(weights) # convert to profit else: # W would represent profit had we wanted to convert it pass return maxProfitMatching(_weights) # library hungarian is available, use it if _flip: # convert to max profit problem _weights = flip(weights) match1, match2 = hungarian.lap(_weights) Mu, Mv = ({}, {}) for i, j in enumerate(match1): Mu[i] = j for i, j in enumerate(match2): Mv[j] = i # evaluate total cost (or profit) using the original weight matrix # assert evalMatch(Mu, weights) == evalMatch(Mv, weights), "Inconsistent total weight" return (Mu, Mv, evalMatch(Mu, weights))
def fit_best_matching(prob, pairs_of_ids, filtered_pairs_of_ids, X): locu_id_to_index, locu_index_to_id, four_id_to_index, four_index_to_id = build_indices( pairs_of_ids) matching_weights = np.zeros((len(locu_id_to_index), len(four_id_to_index))) for i in range(len(filtered_pairs_of_ids)): locu_id, four_id = filtered_pairs_of_ids[i] matching_weights[locu_id_to_index[locu_id]][ four_id_to_index[four_id]] = prob[i][1] max_matching = hungarian.lap(-matching_weights)[0] #store all matches best_matching = {} for i in range(len(max_matching)): locu_id = locu_index_to_id[i] four_id = four_index_to_id[max_matching[i]] w = matching_weights[i][max_matching[i]] best_matching[(locu_id, four_id)] = w return best_matching
def process(self, matrixOrig): lDim = sorted([matrixOrig._height, matrixOrig._width]) matrixOrig = tds.squarify(matrixOrig, 1.) matrix = matrixOrig.getMatrix() nMatrix = [[1. - val for val in line] for line in matrix] pairs = hungarian.lap(matrix)[0] # res = munk.maxWeightMatching(nMatrix) # pairs = [res[0][i] for i in res[0].keys()] lA_void = LinedMatrix(lDim[1], lDim[0], 1.) for i in xrange(lDim[0]): # j = pairs[i] ni = pairs[i] + i * lA_void._width lA_void.data[ni] = matrix[i][pairs[i]] # lA_void.set(j,i,matrix[i][j]) self._context["pairs"] = pairs return lA_void
def graph_node_distance(g1, g2): MAX_VALUE = 10000 cost_matrix = [] g1_indexs = list(g1.nodes()) g2_indexs = list(g2.nodes()) #print g1_indexs, g2_indexs, len(g1_indexs), len(g2_indexs) matrix_len = max(len(g1), len(g2)) min_len = min(len(g1), len(g2)) if min_len == 0: return MAX_VALUE diff = min_len * 1.0 / matrix_len # print diff if diff < 0.5: return MAX_VALUE for row_id in xrange(matrix_len): row = [] for column_id in xrange(matrix_len): src = obtain_node_feature(g1, g1_indexs, row_id) dst = obtain_node_feature(g2, g2_indexs, column_id) cost = cal_nodecost(src, dst) #print row_id, column_id, src, dst, cost if USE_WEIGHT: src_weight = obtain_node_weight(g1, g1_indexs, row_id) dst_weight = obtain_node_weight(g2, g2_indexs, column_id) #cost_weight =1-(1-cost)*src_weight*dst_weight cost_weight = (cost) * (src_weight + dst_weight) / 2 cost = cost_weight row.append(cost) cost_matrix.append(row) if len(cost_matrix) == 0: return MAX_VALUE mapping = hungarian.lap(cost_matrix) #print '-------------- cost matrix -------------' #print cost_matrix #print '-------------- matrix mapping-------------' #print mapping distance = caldistance(mapping, cost_matrix) return distance
def getWassersteinDist(S, T): import hungarian #Requires having compiled the library N = S.shape[0] M = T.shape[0] #Handle the cases where there are no points in the diagrams if N == 0: S = np.array([[0, 0]]) N = 1 if M == 0: T = np.array([[0, 0]]) M = 1 DUL = sklearn.metrics.pairwise.pairwise_distances(S, T) #Put diagonal elements into the matrix #Rotate the diagrams to make it easy to find the straight line #distance to the diagonal cp = np.cos(np.pi / 4) sp = np.sin(np.pi / 4) R = np.array([[cp, -sp], [sp, cp]]) S = S[:, 0:2].dot(R) T = T[:, 0:2].dot(R) D = np.zeros((N + M, N + M)) D[0:N, 0:M] = DUL UR = np.max(D) * np.ones((N, N)) np.fill_diagonal(UR, S[:, 1]) D[0:N, M:M + N] = UR UL = np.max(D) * np.ones((M, M)) np.fill_diagonal(UL, T[:, 1]) D[N:M + N, 0:M] = UL D = D.tolist() #Run the hungarian algorithm matchidx = hungarian.lap(D)[0] matchidx = [(i, matchidx[i]) for i in range(len(matchidx))] matchdist = 0 for pair in matchidx: (i, j) = pair matchdist += D[i][j] return (matchidx, matchdist, D)
def graph_edge_distance(g1, g2): cost_matrix = [] #print g1.edges(), g2.edges() g1_indexs = list(g1.edges()) g2_indexs = list(g2.edges()) matrix_len = max(len(g1), len(g2)) min_len = min(len(g1), len(g2)) if min_len == 0: return 0 diff = min_len * 1.0 / matrix_len # print diff if diff < 0.5: return 100 for row_id in xrange(matrix_len): row = [] for column_id in xrange(matrix_len): src = obtain_edge_feature(g1, g1_indexs, row_id) dst = obtain_edge_feature(g2, g2_indexs, column_id) if src is None or dst is None: cost = 0 else: cost = cal_edgecost(src, dst) # use weight if USE_WEIGHT: src_weight = obtain_edge_weight(g1, g1_indexs, row_id) dst_weight = obtain_edge_weight(g2, g2_indexs, column_id) cost_weight = (cost) * (src_weight + dst_weight) / 2 #cost_weight = 1-(1-cost)*src_weight*dst_weight cost = cost_weight #print 'Edge: ', cost, src_weight, dst_weight #print 'SRC, DST ', src, dst #cost = cal_edgecost(src, dst) row.append(cost) cost_matrix.append(row) if len(cost_matrix) == 0: return -1 mapping = hungarian.lap(cost_matrix) # print cost_matrix,mapping distance = caldistance(mapping, cost_matrix) return distance
def graph_node_distance(g1, g2): cost_matrix = [] g1_indexs = g1.nodes() g2_indexs = g2.nodes() matrix_len = max(len(g1), len(g2)) min_len = min(len(g1), len(g2)) diff = min_len * 1.0 / matrix_len if diff < 0.5: return 100 for row_id in xrange(matrix_len): row = [] for column_id in xrange(matrix_len): src = obtain_node(g1, g1_indexs, row_id) dst = obtain_node(g2, g2_indexs, column_id) cost = cal_nodecost(src, dst) row.append(cost) cost_matrix.append(row) if len(cost_matrix) == 0: return 10 mapping = hungarian.lap(cost_matrix) distance = caldistance(mapping, cost_matrix) return distance
def match_synapses(connections, gt_connections): '''Match ground-truth synapses against detected synapses Returns the count table calculation from table 2 of the paper. ''' # # Make a matrix of distances and augment the matrix with big numbers # so that is square. Things that match the augmented side get the booby # prize of being inserts or deletes. # # The NDI paper says that the max distance is 300 nm # x = np.array(connections["synapse_center"]["x"]) y = np.array(connections["synapse_center"]["y"]) z = np.array(connections["synapse_center"]["z"]) n1 = np.array(connections["neuron_1"]) n2 = np.array(connections["neuron_2"]) gtx = gt_connections["synapse_center"]["x"] gty = gt_connections["synapse_center"]["y"] gtz = gt_connections["synapse_center"]["z"] gtn1 = gt_connections["neuron_1"] gtn2 = gt_connections["neuron_2"] # # First, we set the matrix to have the augmented value # side = len(x) + len(gtx) matrix = np.ones((side, side), int) * MAX_SYNAPSE_DISTANCE # # Then we place the distances within # matrix[:len(x), :len(gtx)] = np.sqrt( ((x[:, np.newaxis] - gtx[np.newaxis, :]) * xy_nm) ** 2 + ((y[:, np.newaxis] - gty[np.newaxis, :]) * xy_nm) ** 2 + ((z[:, np.newaxis] - gtz[np.newaxis, :]) * z_nm) **2).astype(int) # # Run the hungarian # detected_id, gt_id = hungarian.lap(matrix) # # Get rid of the augmented portion of the matches # detected_id = detected_id[:len(x)] gt_id = gt_id[:len(gtx)] # # If detected_id is in the augmented range, it's an insertion and goes # into row 0 # insertion_idxs = np.where(detected_id >= len(gtx))[0] insertion_ids = np.hstack((n1[insertion_idxs], n2[insertion_idxs])) # # Likewise with gt_id and deletions # deletion_idxs = np.where(gt_id >= len(x))[0] deletion_ids = np.hstack((gtn1[deletion_idxs], gtn2[deletion_idxs])) # # The matches # idx = np.where(detected_id < len(x))[0] n1_ids = n1[idx] n2_ids = n2[idx] gtn1_ids = gtn1[detected_id[idx]] gtn2_ids = gtn2[detected_id[idx]] # # And put it all together into the matrix to return # d = np.hstack((insertion_ids, np.zeros(len(deletion_ids), int), n1_ids, n2_ids)) gt = np.hstack((np.zeros(len(insertion_ids), int), deletion_ids, gtn1_ids, gtn2_ids)) matrix = coo_matrix((np.ones(len(d)), (d, gt))) matrix.sum_duplicates() return matrix.toarray()
def find_matched_pairs(cost_matrix): """ Apply Hungarian method to cost matrix to get best matched pairs. """ (row_assigns, column_assigns) = hungarian.lap(cost_matrix) return row_assigns
import numpy import hungarian inf = 1000 a = numpy.array( [[inf,2,11,10,8,7,6,5], [6,inf,1,8,8,4,6,7], [5,12,inf,11,8,12,3,11], [11,9,10,inf,1,9,8,10], [11,11,9,4,inf,2,10,9], [12,8,5,2,11,inf,11,9], [10,11,12,10,9,12,inf,3], [10,10,10,10,6,3,1,inf]] ) answers = hungarian.lap(a) print('For each row, matching column index:', answers[0]) assert(numpy.array_equal([1, 2, 0, 4, 5, 3, 7, 6], answers[0])) points0 = list(zip(range(len(answers[0])), answers[0])) print('Matching pairs, sorted by row:', points0) print('For each column, matching row index:', answers[1]) assert(numpy.array_equal([2, 0, 1, 5, 3, 4, 7, 6], answers[1])) points1 = list(zip(answers[1], range(len(answers[1])))) print('Matching pairs, sorted by col:', points1) sum0 = sum(a[range(len(answers[0])), answers[0]]) sum1 = sum(a[answers[1], range(len(answers[1]))]) print('Cost of match:', sum1) assert(sum0 == 17) assert(sum1 == 17)
def findBestPermutationRBMol_list(coords1, coords2, mol, mollist): """ find the permutation of the molecules which minimizes the distance between the two coordinates """ nmol = len(coords1) / 3 / 2 nperm = len(mollist) coords2old = coords2.copy() ######################################### # create the cost matrix ######################################### cost = np.zeros([nperm, nperm], np.float64) for i in range(nperm): imol = mollist[i] com1 = coords1[imol * 3:imol * 3 + 3] aa1 = coords1[3 * nmol + imol * 3:3 * nmol + imol * 3 + 3] for j in range(nperm): jmol = mollist[j] com2 = coords2[jmol * 3:jmol * 3 + 3] aa2 = coords2[3 * nmol + jmol * 3:3 * nmol + jmol * 3 + 3] cost[j, i], newaa = molmolMinSymDist(com1, aa1, com2, aa2, mol) #convert cost matrix to a form used by munkres matrix = cost.tolist() ######################################### # run the hungarian algorithm ######################################### try: #use the hungarian package which is compiled import hungarian newind1 = hungarian.lap(cost) newind = [(i, j) for i, j in enumerate(newind1[0])] #print "hungari newind", newind except ImportError: try: #use the munkres package #convert cost matrix to a form used by munkres from munkres import Munkres matrix = cost.tolist() m = Munkres() newind = m.compute(matrix) #print "munkres newind", newind except ImportError: print "ERROR: findBestPermutation> You must install either the hungarian or the munkres package to use the Hungarian algorithm" #raise Exception("ERROR: findBestPermutation> You must install either the hungarian or the munkres package to use the Hungarian algorithm") dist = np.linalg.norm(coords1 - coords2) return dist, coords1, coords2 ######################################### # apply the permutation ######################################### costnew = 0. coords2 = coords2old.copy() for (iold, inew) in newind: costnew += cost[iold, inew] if iold != inew: moliold = mollist[iold] molinew = mollist[inew] #print "%4d -> %4d" % (moliold, molinew) #change the com coords coords2[molinew * 3:molinew * 3 + 3] = coords2old[moliold * 3:moliold * 3 + 3] #change the aa coords coords2[3 * nmol + molinew * 3:3 * nmol + molinew * 3 + 3] = coords2old[3 * nmol + moliold * 3:3 * nmol + moliold * 3 + 3] dist = np.sqrt(costnew) return dist, coords1, coords2
def match_synapses_by_distance(gt, detected, xy_nm, z_nm, max_distance): '''Match the closest pairs of ground-truth and detected synapses :param gt: a label volume of the ground-truth synapses :param detected: a label volume of the detected synapses :param xy_nm: size of voxel in the x/y direction :param z_nm: size of voxel in the z direction :param max_distance: maximum allowed distance for a match Centroids are calculated for each object and pairwise distances are calculated for each object. These are fed into a global optimization which tries to find the matching of gt with detected that results in the minimum distance. An alternative is proposed for each object that is the maximum distance and all pairs greater than the maximum distance are given a distance of infinity. This enforces the max_distance constraint. ''' z, y, x = np.mgrid[0:gt.shape[0], 0:gt.shape[1], 0:gt.shape[2]] areas = np.bincount(gt.flatten()) areas[0] = 0 n_gt_orig = len(areas) gt_map = np.where(areas > 0)[0] n_gt = len(gt_map) xc_gt = np.bincount(gt.flatten(), x.flatten())[gt_map] / areas[gt_map] yc_gt = np.bincount(gt.flatten(), y.flatten())[gt_map] / areas[gt_map] zc_gt = np.bincount(gt.flatten(), z.flatten())[gt_map] / areas[gt_map] areas = np.bincount(detected.flatten()) areas[0] = 0 n_d_orig = len(areas) d_map = np.where(areas > 0)[0] n_d = len(d_map) xc_d = np.bincount(detected.flatten(), x.flatten())[d_map] / areas[d_map] yc_d = np.bincount(detected.flatten(), y.flatten())[d_map] / areas[d_map] zc_d = np.bincount(detected.flatten(), z.flatten())[d_map] / areas[d_map] matrix = np.sqrt(( (xc_gt[:, np.newaxis] - xc_d[np.newaxis, :]) * xy_nm)**2 + ( (yc_gt[:, np.newaxis] - yc_d[np.newaxis, :]) * xy_nm)**2 + ((zc_gt[:, np.newaxis] - zc_d[np.newaxis, :]) * z_nm)**2) matrix[matrix > max_distance] = np.inf # # The alternative is that the thing matches nothing. We augment # the matrix with alternatives for each object, for instance: # # DA3 inf inf x 0 0 # DA2 inf x inf 0 0 # DA1 x inf inf 0 0 # G2 y y y inf x # G1 y y y x inf # D1 D2 D3 GA1 GA2 # big_matrix = np.zeros((n_gt + n_d, n_gt + n_d), np.float32) big_matrix[:n_gt, :n_d] = matrix big_matrix[n_gt:, :n_d] = np.inf big_matrix[:n_gt, n_d:] = np.inf big_matrix[n_gt + np.arange(n_d), np.arange(n_d)] = max_distance big_matrix[np.arange(n_gt), n_d + np.arange(n_gt)] = max_distance # # Solve it # d_match, gt_match = hungarian.lap(big_matrix) # # Get rid of the augmented results # d_match = d_match[:n_gt] gt_match = gt_match[:n_d] # # The gt with matches in d have d not in the alternative range # gt_winners = np.where(d_match < n_d)[0] gt_result = np.zeros(n_gt_orig, int) gt_result[gt_map[gt_winners]] = d_map[d_match[gt_winners]] # # Same for d # d_winners = np.where(gt_match < n_gt)[0] d_result = np.zeros(n_d_orig, int) d_result[d_map[d_winners]] = gt_map[gt_match[d_winners]] return gt_result, d_result
#!/usr/bin/env python ## Thanks to Dr N.D. van Foreest for providing this example code. ## """ The cost matrix is based on Balas and Toth, 1985, Branch and bound # methods, in Lawler, E.L, et al., The TSP, John Wiley & Sons, Chischester, pp 361--401. """ import numpy import hungarian inf = 1000 a = numpy.array( [[inf,2,11,10,8,7,6,5], [6,inf,1,8,8,4,6,7], [5,12,inf,11,8,12,3,11], [11,9,10,inf,1,9,8,10], [11,11,9,4,inf,2,10,9], [12,8,5,2,11,inf,11,9], [10,11,12,10,9,12,inf,3], [10,10,10,10,6,3,1,inf]] ) print hungarian.lap(a)[0]
def findBestPermutationListHungarian( X1, X2, atomlist = None ): """ For a given set of positions X1 and X2, find the best permutation of the atoms in X2. Use an implementation of the Hungarian Algorithm in the Python package index (PyPi) called munkres (another name for the algorithm). The hungarian algorithm time scales as O(n^3), much faster than the O(n!) from looping through all permutations. http://en.wikipedia.org/wiki/Hungarian_algorithm http://pypi.python.org/pypi/munkres/1.0.5.2 another package, hungarian, implements the same routine in comiled C http://pypi.python.org/pypi/hungarian/ When I first downloaded this package I got segfaults. The problem for me was casing an integer pointer as (npy_intp *). I may add the corrected version to pele at some point """ nsites = len(X1) / 3 if atomlist == None: atomlist = range(nsites) atomlist = np.array(atomlist) ######################################### # create the cost matrix # cost[j,i] = (X1(i,:) - X2(j,:))**2 ######################################### X1 = X1.reshape([-1,3]) X2 = X2.reshape([-1,3]) cost = makeCostMatrix(X1, X2, atomlist) #cost = np.sqrt(cost) ######################################### # run the hungarian algorithm ######################################### newind1 = hungarian.lap(cost) perm = newind1[1] #note: the hungarian algorithm changes #the cost matrix. I'm not sure why, and it may be a bug, #but the indices it returns are still correct # if not np.all(cost >= 0): # m = np.max(np.abs(cost-costsave)) # print "after hungarian cost greater than zero:, %g" % m ######################################### # apply the permutation ######################################### newperm = np.array(atomlist[perm]) X2new = np.copy(X2) X2new[atomlist,:] = X2[newperm,:] X1 = X1.reshape(-1) X2new = X2new.reshape(-1) dist = np.linalg.norm(X1-X2new) return dist, X1, X2new
def kuhn_munkres(traj1, traj2): #Atom labels/coords/# atoms NA_a = traj1.n_atoms NA_b = traj2.n_atoms a_coords = np.reshape(traj1.xyz, (traj1.n_atoms, 3)) b_coords = np.reshape(traj2.xyz, (traj1.n_atoms, 3)) a_labels = [str(traj1.top.atom(i))[2:] for i in range(traj1.n_atoms)] b_labels = [str(traj2.top.atom(i))[2:] for i in range(traj2.n_atoms)] #Compute RMSD Standard way InitRMSD_unsorted = kabsch_rmsd(a_coords, b_coords) #print('Unsorted atom RMSD: %.4f' % InitRMSD_unsorted) #Sort the atom labels and coords a_labels, a_coords = sort(a_labels, a_coords) b_labels, b_coords = sort(b_labels, b_coords) #Count number of unique atoms Uniq = Counter(a_labels).keys() n_types = len(Counter(a_labels).values()) Perm = {} for i in range(len(Uniq)): Perm[Uniq[i]] = 'perm_' + Uniq[i] Perm[Uniq[i]] = [] #Generate hastables for traj1 and traj2 and remove COM a_Coords, a_Indices = build_hashtable(a_labels, a_coords, Uniq, 'a_') b_Coords, b_Indices = build_hashtable(b_labels, b_coords, Uniq, 'b_') A = np.array(a_Coords[Uniq[0]]) B = np.array(b_Coords[Uniq[0]]) A -= sum(A) / len(A) B -= sum(B) / len(B) #Define swap and reflections for initial B (coordinates of traj2) and apply swaps = [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)] reflects = [(1, 1, 1), (-1, 1, 1), (1, -1, 1), (1, 1, -1), (-1, -1, 1), (-1, 1, -1), (1, -1, -1), (-1, -1, -1)] B_transformed = [[transform_coords(B, i, j), i, j] for i in swaps for j in reflects] #Performs the munkres algorithm on each set of transformed coordinates rmsds = [] for i in range(len(B_transformed)): l = 0 cost_matrix = np.array( [[np.linalg.norm(a - b) for b in B_transformed[i][0]] for a in A]) LAP = hungarian.lap(cost_matrix) Perm[Uniq[l]] = [] for j in range(len(LAP[0])): Perm[Uniq[l]] += [(j, LAP[0][j])] Perm[Uniq[l]] = sorted(Perm[Uniq[l]], key=lambda x: x[0]) Perm[Uniq[l]] = [x[1] for x in Perm[Uniq[l]]] b_perm = permute_atoms(b_coords, Perm[Uniq[l]], b_Indices[Uniq[l]]) b_trans = transform_coords(b_perm, B_transformed[i][1], B_transformed[i][2]) while l < n_types: if l > 0: b_Coords[Uniq[l]] = parse_atoms(b_labels, b_final, Uniq[l]) else: b_Coords[Uniq[l]] = parse_atoms(b_labels, b_trans, Uniq[l]) cost_matrix = np.array( [[np.linalg.norm(a - b) for b in np.array(b_Coords[Uniq[l]])] for a in np.array(a_Coords[Uniq[l]])]) LAP = hungarian.lap(cost_matrix) Perm[Uniq[l]] = [] for k in range(len(LAP[0])): Perm[Uniq[l]] += [(k, LAP[0][k])] Perm[Uniq[l]] = sorted(Perm[Uniq[l]], key=lambda x: x[0]) Perm[Uniq[l]] = [x[1] for x in Perm[Uniq[l]]] b_final = permute_atoms(b_trans, Perm[Uniq[l]], b_Indices[Uniq[l]]) b_trans = b_final l += 1 q = l - 1 rmsds.append([ kabsch_rmsd(a_coords, b_final), B_transformed[i][1], B_transformed[i][2], b_final ]) rmsds = sorted(rmsds, key=lambda x: x[0]) #print('Hungarian Algorithm RMSD: %.4f' % rmsds[0][0]) return rmsds[0][0]
def match_track(self, detections, frame, frame_index): # Create tracks if no tracks vector found if (len(self.tracks) == 0): for i in range(len(detections)): track = CellTrack(detections[i], self.trackIdCount) self.tracks.append(track) self.trackIdCount += 1 N = len(self.tracks) M = len(detections) D = max(N, M) cost = np.full([D, D], 2000) # Cost matrix predit = np.zeros([N, 2]) for i in range(N): predit[i][0] = self.tracks[i].prediction[0][0] predit[i][1] = self.tracks[i].prediction[1][0] det = np.zeros([M, 2]) for i in range(M): det[i][0] = detections[i][0][0] det[i][1] = detections[i][1][0] cost_new = distance_matrix(predit, det) cost[0:N, 0:M] = cost_new assignment = [] for _ in range(N): assignment.append(-1) t3 = time.time() answers = hungarian.lap(cost) t4 = time.time() if (N > M): for i in range(M): assignment[answers[1][i]] = i sum0 = sum(cost[answers[1], range(len(answers[1]))]) else: for i in range(N): assignment[i] = answers[0][i] sum1 = sum(cost[range(len(answers[0])), answers[0]]) for i in range(len(assignment)): if (assignment[i] == -1 or cost[i][assignment[i]] > self.dist_thresh): assignment[i] = -1 self.tracks[i].skipped_frames += 1 else: self.tracks[i].skipped_frames = 0 i = 0 while (i < len(self.tracks)): if (self.tracks[i].skipped_frames > self.max_frames_to_skip): self.del_tracks.append(self.tracks[i]) del self.tracks[i] del assignment[i] else: i = i + 1 un_assigned_detects = [] for i in range(len(detections)): if i not in assignment: un_assigned_detects.append(i) points = [] cluster_points = [] if (frame_index > 0): nCluster = len(self.clusters) for j in range(nCluster): cluster_points.append([]) if (len(un_assigned_detects) != 0): for i in range(len(un_assigned_detects)): track = CellTrack(detections[un_assigned_detects[i]], self.trackIdCount) self.tracks.append(track) frame1 = np.zeros([1328, 1048]) x = int(detections[un_assigned_detects[i]][0][0]) y = int(detections[un_assigned_detects[i]][1][0]) cv2.circle(frame1, (x, y), 10, (255, 255, 255), 1) for i in range(len(self.clusters)): frame2 = np.zeros([1328, 1048]) if (len(self.clusterContour_img_mat[i][frame_index - 1]) > 0): cv2.fillPoly( frame2, pts=self.clusterContour_img_mat[i][frame_index - 1][0], color=(255, 255, 255)) frame3 = np.logical_and(frame1, frame2) if True in frame3: # if(i == 31): # print(i, x, y) four_corner = 3 cluster_points[i].append( [x - four_corner, y - four_corner]) cluster_points[i].append( [x - four_corner, y + four_corner]) cluster_points[i].append( [x + four_corner, y - four_corner]) cluster_points[i].append( [x + four_corner, y + four_corner]) track.cluster_index = i self.clusters[i].append(track.track_id) break else: pass self.trackIdCount += 1 for i in range(len(assignment)): if debug == 1: print('assign') if (assignment[i] != -1): if debug == 1: print(assignment[i]) self.tracks[i].trace.append(detections[assignment[i]]) self.tracks[i].prediction = detections[assignment[i]] if (len(self.tracks[i].trace) > 0): x3 = self.tracks[i].trace[len(self.tracks[i].trace) - 1][0][0] y3 = self.tracks[i].trace[len(self.tracks[i].trace) - 1][1][0] if (frame_index == 0): points.append([x3, y3]) else: if (self.tracks[i].cluster_index > -1): four_corner = 3 cluster_points[ self.tracks[i].cluster_index].append( [x3 - four_corner, y3 - four_corner]) cluster_points[ self.tracks[i].cluster_index].append( [x3 + four_corner, y3 - four_corner]) cluster_points[ self.tracks[i].cluster_index].append( [x3 - four_corner, y3 + four_corner]) cluster_points[ self.tracks[i].cluster_index].append( [x3 + four_corner, y3 + four_corner]) if (len(self.tracks[i].trace) > self.max_trace_length): for j in range( len(self.tracks[i].trace) - self.max_trace_length): del self.tracks[i].trace[j] if (frame_index == 0): clustering = DBSCAN(eps=15, min_samples=4).fit(points) nCluster = max(clustering.labels_) + 1 for i in range(nCluster): cluster_points.append([]) self.clusters.append([]) one_row = np.zeros(frame_number * 3, dtype=float) one_row[:] = np.nan self.cluster_img_mat.append(one_row) one_row = np.zeros(frame_number, dtype=float) one_row[:] = np.nan self.cell_num_in_cluster.append(one_row) x_axis = np.zeros(frame_number, dtype=float) x_axis[:] = np.nan self.cell_num_in_cluster_x.append(x_axis) one_row = [] for i in range(frame_number): one_row.append([]) self.clusterContour_img_mat.append(one_row) self.clusters_eating_or_not = np.zeros(nCluster) for i in range(len(clustering.labels_)): if (clustering.labels_[i] > -1): cluster_points[clustering.labels_[i]].append( [points[i][0] - 5, points[i][1] - 5]) cluster_points[clustering.labels_[i]].append( [points[i][0] + 5, points[i][1] - 5]) cluster_points[clustering.labels_[i]].append( [points[i][0] - 5, points[i][1] + 5]) cluster_points[clustering.labels_[i]].append( [points[i][0] + 5, points[i][1] + 5]) self.clusters[clustering.labels_[i]].append(i) self.tracks[i].cluster_index = clustering.labels_[i] for i in range(len(cluster_points)): if (len(cluster_points[i]) > 0): hull = cv2.convexHull(np.array(cluster_points[i], np.int32)) (x, y), radius = cv2.minEnclosingCircle( np.array(cluster_points[i], np.int32)) cv2.drawContours(frame, [hull], -1, (0, 0, 255), 1) cv2.putText(frame, str(i), (int(x + radius), int(y) + 3), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255)) self.clusterContour_img_mat[i][frame_index].append([hull]) self.cluster_img_mat[i][frame_index * 3 + 0] = x self.cluster_img_mat[i][frame_index * 3 + 1] = y self.cluster_img_mat[i][frame_index * 3 + 2] = radius self.cell_num_in_cluster[i][frame_index] = len( cluster_points[i]) >> 2 self.cell_num_in_cluster_x[i][frame_index] = frame_index
def match_track(self, detections, frame, frame_index): if (len(self.tracks) == 0): for i in range(len(detections)): track = CellTrack(detections[i], self.trackIdCount) self.tracks.append(track) one_row = np.zeros(array_size, dtype=float) one_row[:] = np.nan self.alive_mat.append(one_row) one_row = np.zeros(array_size * 2, dtype=int) self.coordinate_matrix.append(one_row) self.trackIdCount += 1 N = len(self.tracks) M = len(detections) # print('tracks, dections:', N, M) D = max(N, M) cost = np.full([D, D], 2000) # Cost matrix predit = np.zeros([N, 2]) for i in range(N): predit[i][0] = self.tracks[i].prediction[0][0] predit[i][1] = self.tracks[i].prediction[1][0] det = np.zeros([M, 2]) for i in range(M): det[i][0] = detections[i][0][0] det[i][1] = detections[i][1][0] cost_new = distance_matrix(predit, det) cost_new = np.where(cost_new < 20, cost_new, 2000) cost[0:N, 0:M] = cost_new assignment = [] for _ in range(N): assignment.append(-1) t3 = time.time() answers = hungarian.lap(cost) t4 = time.time() if (N > M): for i in range(M): assignment[answers[1][i]] = i sum0 = sum(cost[answers[1], range(len(answers[1]))]) # print("sum0: ", sum0) else: for i in range(N): assignment[i] = answers[0][i] sum1 = sum(cost[range(len(answers[0])), answers[0]]) # print("sum1: ", sum1) for i in range(len(assignment)): if (assignment[i] == -1 or cost[i][assignment[i]] > self.dist_thresh): assignment[i] = -1 # un_assigned_tracks.append(i) self.tracks[i].skipped_frames += 1 else: self.tracks[i].skipped_frames = 0 i = 0 while (i < len(self.tracks)): if (self.tracks[i].skipped_frames > self.max_frames_to_skip): # print("track " + str(self.tracks[i].track_id) + " skipped " + str(self.tracks[i].skipped_frames) + "frames") self.del_tracks.append(self.tracks[i]) del self.tracks[i] del assignment[i] else: i = i + 1 un_assigned_detects = [] for i in range(len(detections)): if i not in assignment: un_assigned_detects.append(i) assignment.append(i) if (len(un_assigned_detects) != 0): for i in range(len(un_assigned_detects)): track = CellTrack(detections[un_assigned_detects[i]], self.trackIdCount) self.tracks.append(track) one_row = np.zeros(array_size, dtype=float) one_row[:] = np.nan self.alive_mat.append(one_row) one_row = np.zeros(array_size * 2, dtype=int) self.coordinate_matrix.append(one_row) self.trackIdCount += 1 live_count = 0 dead_count = 0 for i in range(len(assignment)): if (assignment[i] != -1): if debug == 1: print(assignment[i]) self.tracks[i].trace.append(detections[assignment[i]]) self.tracks[i].prediction = detections[assignment[i]] if (len(self.tracks[i].trace) > 0): x3 = self.tracks[i].trace[len(self.tracks[i].trace) - 1][0][0] y3 = self.tracks[i].trace[len(self.tracks[i].trace) - 1][1][0] ratio = self.tracks[i].trace[len(self.tracks[i].trace) - 1][2][0] self.coordinate_matrix[self.tracks[i].track_id][frame_index * 2] = x3 self.coordinate_matrix[self.tracks[i].track_id][ frame_index * 2 + 1] = y3 self.alive_mat[ self.tracks[i].track_id][frame_index] = ratio cv2.putText(frame, str(self.tracks[i].track_id), (int(x3 + 9) * scale, int(y3 + 4) * scale), cv2.FONT_HERSHEY_SIMPLEX, 0.3 * scale, (255, 127, 255), 2) if (len(self.tracks[i].trace) > self.max_trace_length): for j in range( len(self.tracks[i].trace) - self.max_trace_length): del self.tracks[i].trace[j]
def match_synapses_by_overlap(gt, detected, min_overlap_pct, \ min_gt_overlap_pct=None): '''Determine the best ground truth synapse for a detected synapse by overlap :param gt: the ground-truth labeling of the volume. 0 = not synapse, 1+ are the labels for each synapse :param detected: the computer-generated labeling of the volume :param min_overlap_pct: the percentage of voxels that must overlap for the algorithm to consider two objects. :param min_gt_overlap_pct: the percentage of gt voxels that must overlap detected voxels to score an overlap. Defaults to min_overlap_pct The algorithm tries to maximize the number of overlapping voxels globally. It finds the overlap between each pair of gt and detected objects. The cost is the number of voxels uncovered by both, given the choice. There must be an alternative cost for each gt matching nothing and for each detected matching nothing. This is the area of the thing minus the min_overlap_pct so that anything matching less than the min_overlap_pct will match against nothing. Return two vectors. The first vector is the matching label in d for each gt label (with zero for "not a match"). The second vector is the matching label in gt for each detected label. ''' if min_gt_overlap_pct is None: min_gt_overlap_pct = min_overlap_pct gt_areas = np.bincount(gt.flatten()) gt_areas[0] = 0 # # gt_map is a map of the original label #s for the labels that are > 0 # We work with the gt_map indices, nto the label #s # gt_r_map goes the other way # gt_map = np.where(gt_areas > 0)[0] gt_r_map = np.zeros(len(gt_areas), int) n_gt = len(gt_map) gt_r_map[gt_map] = np.arange(n_gt) # # for detected... # d_areas = np.bincount(detected.flatten()) d_areas[0] = 0 d_map = np.where(d_areas > 0)[0] d_r_map = np.zeros(len(d_areas), int) n_d = len(d_map) d_r_map[d_map] = np.arange(n_d) # # Get the matrix of correspondences. # z, y, x = np.where((gt > 0) & (detected > 0)) matrix = coo_matrix((np.ones(len(z), int), (gt_r_map[gt[z, y, x]], d_r_map[detected[z, y, x]])), shape=(n_gt, n_d)) matrix.sum_duplicates() matrix = matrix.toarray() # # Enforce minimum overlap # d_min_overlap = d_areas[d_map] * min_overlap_pct / 100 gt_min_overlap = gt_areas[gt_map] * min_gt_overlap_pct / 100 bad_gt, bad_d = np.where((matrix < gt_min_overlap[:, np.newaxis]) | (matrix < d_min_overlap[np.newaxis, :])) matrix[bad_gt, bad_d] = 0 # # The score of each cell is the number of voxels in each cell minus # double the overlap - the amount of voxels covered in each map by # the overlap. # matrix = \ gt_areas[gt_map][:, np.newaxis] +\ d_areas[d_map][np.newaxis, :] -\ matrix # # The alternative is that the thing matches nothing. We augment # the matrix with alternatives for each object, for instance: # # DA3 inf inf x 0 0 # DA2 inf x inf 0 0 # DA1 x inf inf 0 0 # G2 y y y inf x # G1 y y y x inf # D1 D2 D3 GA1 GA2 # # x is the area of the thing * (1 - min_pct_overlap) # y is the area of both things - 2x overlap # big_matrix = np.zeros((n_gt + n_d, n_gt + n_d), np.float64) big_matrix[:n_gt, :n_d] = matrix big_matrix[n_gt:, :n_d] = np.inf big_matrix[:n_gt, n_d:] = np.inf # # The "eps" here is present to guarantees that the hungarian will take the # alternative if there is no overlap. # eps = np.finfo(np.float32).eps big_matrix[n_gt + np.arange(n_d), np.arange(n_d)] = d_areas[d_map] - eps big_matrix[np.arange(n_gt), n_d + np.arange(n_gt)] = gt_areas[gt_map] - eps # # There's a problem with hungarian.lap where it can't solve if all # rows or columns has no members less than infinity. This would not be # a problem except that hungarian's "infinity" is 100,000. Rescale to # make every non-infinite element less than 100,000 # hungarian_inf = 100000 mmax = np.max(big_matrix[~np.isinf(big_matrix)]) if mmax >= hungarian_inf: big_matrix = big_matrix / mmax # # # Solve it # d_match, gt_match = hungarian.lap(big_matrix) # # Get rid of the augmented results # d_match = d_match[:n_gt] gt_match = gt_match[:n_d] # # The gt with matches in d have d not in the alternative range # gt_winners = np.where(d_match < n_d)[0] gt_result = np.zeros(len(gt_areas), int) gt_result[gt_map[gt_winners]] = d_map[d_match[gt_winners]] # # Same for d # d_winners = np.where(gt_match < n_gt)[0] d_result = np.zeros(len(d_areas), int) d_result[d_map[d_winners]] = gt_map[gt_match[d_winners]] return gt_result, d_result
argparser = ArgumentParser() argparser.add_argument('--qtype', default="qb") argparser.add_argument('--inputname', default="jeo_combined_naqt") argparser.add_argument('--inputname2', default="naqt_combined_jeo") argparser.add_argument('--inputname3', default="hungarian") argparser.add_argument('--foldername', default="hungarian_results") args = argparser.parse_args() path = Path(args.foldername) if not path.exists(): path.mkdir() #with open("%s/clues.pkl" % args.inputname, "rb") as f: #clues = pickle.load(f) topics = np.load("%s/topics.npy" % args.inputname) topics2 = np.load("%s/topics.npy" % args.inputname2) print topics.shape, topics2.shape matrix = np.zeros((len(topics), len(topics2))) for i in range(len(matrix)): for j in range(len(matrix[0])): #matrix[i][j] = euclidean_distance(topics[i], topics2[j]) matrix[i][j] = cross_entropy(topics[i], topics2[j]) + cross_entropy(topics2[j], topics[i]) matches = hungarian.lap(matrix) matches_dict = {} for i in range(len(matches[0])): matches_dict[i] = matches[0][i] print matches_dict #matches = pickle.load(open("%s/matches.pkl" % args.inputname3, "rb")) total = 0 for i in range(len(matches[0])): total += cross_entropy(topics[i], topics2[matches[0][i]]) + cross_entropy(topics2[matches[0][i]], topics[i]) print total
def findBestPermutationRBMol_list(coords1, coords2, mol, mollist): """ find the permutation of the molecules which minimizes the distance between the two coordinates """ nmol = len(coords1) / 3 / 2 nperm = len(mollist) coords2old = coords2.copy() ######################################### # create the cost matrix ######################################### cost = np.zeros( [nperm,nperm], np.float64) for i in range(nperm): imol = mollist[i] com1 = coords1[ imol*3 : imol*3 + 3] aa1 = coords1[3*nmol + imol*3 : 3*nmol + imol*3 + 3] for j in range(nperm): jmol = mollist[j] com2 = coords2[ jmol*3 : jmol*3 + 3] aa2 = coords2[3*nmol + jmol*3 : 3*nmol + jmol*3 + 3] cost[j,i], newaa = molmolMinSymDist(com1, aa1, com2, aa2, mol) #convert cost matrix to a form used by munkres matrix = cost.tolist() ######################################### # run the hungarian algorithm ######################################### try: #use the hungarian package which is compiled import hungarian newind1 = hungarian.lap(cost) newind = [(i, j) for i,j in enumerate(newind1[0])] #print "hungari newind", newind except ImportError: try: #use the munkres package #convert cost matrix to a form used by munkres from munkres import Munkres matrix = cost.tolist() m = Munkres() newind = m.compute(matrix) #print "munkres newind", newind except ImportError: print "ERROR: findBestPermutation> You must install either the hungarian or the munkres package to use the Hungarian algorithm" #raise Exception("ERROR: findBestPermutation> You must install either the hungarian or the munkres package to use the Hungarian algorithm") dist = np.linalg.norm( coords1 - coords2 ) return dist, coords1, coords2 ######################################### # apply the permutation ######################################### costnew = 0.; coords2 = coords2old.copy() for (iold, inew) in newind: costnew += cost[iold,inew] if iold != inew: moliold = mollist[iold] molinew = mollist[inew] #print "%4d -> %4d" % (moliold, molinew) #change the com coords coords2[ molinew*3 : molinew*3+3] = coords2old[ moliold*3 : moliold*3+3] #change the aa coords coords2[3*nmol + molinew*3 : 3*nmol + molinew*3+3] = coords2old[3*nmol + moliold*3 : 3*nmol + moliold*3+3] dist = np.sqrt(costnew) return dist, coords1, coords2
Chischester, pp 361--401. """ from __future__ import print_function import numpy import hungarian inf = 1000 a = numpy.array([[inf, 2, 11, 10, 8, 7, 6, 5], [6, inf, 1, 8, 8, 4, 6, 7], [5, 12, inf, 11, 8, 12, 3, 11], [11, 9, 10, inf, 1, 9, 8, 10], [11, 11, 9, 4, inf, 2, 10, 9], [12, 8, 5, 2, 11, inf, 11, 9], [10, 11, 12, 10, 9, 12, inf, 3], [10, 10, 10, 10, 6, 3, 1, inf]]) answers = hungarian.lap(a) print('For each row, matching column index:', answers[0]) assert (numpy.array_equal([1, 2, 0, 4, 5, 3, 7, 6], answers[0])) points0 = list(zip(range(len(answers[0])), answers[0])) print('Matching pairs, sorted by row:', points0) print('For each column, matching row index:', answers[1]) assert (numpy.array_equal([2, 0, 1, 5, 3, 4, 7, 6], answers[1])) points1 = list(zip(answers[1], range(len(answers[1])))) print('Matching pairs, sorted by col:', points1) sum0 = sum(a[range(len(answers[0])), answers[0]]) sum1 = sum(a[answers[1], range(len(answers[1]))]) print('Cost of match:', sum1) assert (sum0 == 17) assert (sum1 == 17)