def calculate_matches(list_of_matches: List[Match]): t1 = time.clock() tuples_data = [(m.post.identifier, m.fast_streamer.identifier, m.total, m.identifier) for m in list_of_matches] r = [] t = set([t[0] for t in tuples_data]) for i in t: s = list(filter(lambda x: x[0] == i, tuples_data)) s.sort(key=itemgetter(1)) r.append(s) cost_matrix = [] for row in r: cost_row = [] for col in row: cost_row += [sys.maxsize - col[2]] cost_matrix += [cost_row] m = munkres.Munkres() m.pad_matrix(cost_matrix) indices = m.compute(cost_matrix) aggregate = calculate_aggregate_match_score( [r[row][col][2] for row, col in indices]) post_fs = [r[row][col][3] for row, col in indices] final_matches = [] for pfs in post_fs: final_matches.extend( list(filter(lambda x: x.identifier == pfs, list_of_matches))) t2 = time.clock() total_time = t2 - t1 return { 'aggregate': aggregate, 'processing': total_time, 'matches': final_matches }
def maximize_trace(a): """ Maximize trace by minimizing the Frobenius norm of `np.dot(p, a)-np.eye(a.shape[0])`, where `a` is square and `p` is a permutation matrix. Returns permuted version of `a` with maximal trace. """ # Adding columns or rows with zeros to enforce that a is a square matrix. while a.shape[0] != a.shape[1]: if a.shape[0] < a.shape[1]: a = np.vstack((a, np.zeros(a.shape[1]))) elif a.shape[1] < a.shape[0]: a = np.hstack((a, np.zeros((a.shape[0], 1)))) assert a.shape[0] == a.shape[1] d = np.zeros_like(a) n = a.shape[0] b = np.eye(n, dtype=int) for i, j in itertools.product(range(n), range(n)): d[j, i] = sum((b[j, :]-a[i, :])**2) m = munkres.Munkres() inds = m.compute(d) return permute_cols(a, inds)
def evaluation_window(fact, detection, window=0, return_match=False): """classify the detections with window option We construct a bipartite graph G = (V + W, E), where V is fact and W is detection. e = (v, w), e in G, if distance(v, w) <= window. cost(e) = distance(v, w) We find the minimum-cost maximum matching M of G. tp = |M| fp = |W| - |M| fn = |V| - |M| dis = C(M)/|M| average distance between fact and detection in mapping Args: fact (list of int): the index or timestamp of facts/events to be detected detection (list of int): index or timestamp of detected events window (int): maximum distance for the correlation between fact and detection return_match (bool): returns the matching tuple idx [(fact_idx, detection_idx),...] if set true Returns: dict: {'tp':int, 'fp':int, 'fn':int, 'precision':float, 'recall':float, 'dis':float, 'match': list of tuple} """ if len(fact) == 0: summary = dict(tp=None, fp=len(detection), fn=None, precision=None, recall=None, dis=None, match=[]) return summary elif len(detection) == 0: summary = dict(tp=0, fp=0, fn=len(fact), precision=None, recall=0, dis=None, match=[]) return summary cost_matrix = make_cost_matrix(fact, detection, window) # construct the cost matrix of bipartite graph # handle the case there is actually no edges between fact and detection if all([cost_matrix[i][j] == sys.maxsize for i in range(len(fact)) for j in range(len(detection))]): summary = dict(tp=0, fp=len(detection), fn=len(fact), precision=0, recall=0, dis=None, match=[]) return summary match = munkres.Munkres().compute(cost_matrix) # calculate the matching match = [(i, j) for i, j in match if cost_matrix[i][j] <= window] # remove dummy edges # i and j here are the indices of fact and detection, i.e. ist value in fact and jst value in detection matches tp = len(match) fp = len(detection) - tp fn = len(fact) - tp summary = dict(tp=tp, fp=fp, fn=fn, precision=float(tp) / (tp + fp) if len(detection) > 0 else None, recall=float(tp) / (tp + fn) if len(fact) > 0 else None, dis=sum([cost_matrix[i][j] for i, j in match]) / float(tp) if tp > 0 else None) if return_match: summary['match'] = match return summary
def min_cost(costs): """Return average min-cost of assignment of row and column of the `costs` matrix.""" import munkres assignment = munkres.Munkres().compute(costs) cost = sum([costs[r][c] for r, c in assignment]) return cost / len(costs)
def cost_function(cost_matrix, verbose=True): import memotrack import numpy as np import munkres import scipy.spatial.distance as dist from matplotlib import pyplot as plt import sys import time work_matrix = np.copy(cost_matrix) if verbose: print('Using ' + str(np.shape(work_matrix)[0]) + ' points for cost calculation.') sys.stdout.flush() # Calculate cost m = munkres.Munkres() coords = m.compute(work_matrix) cost = 0 # Set initial cost of zero for coord in coords: # Test changes if coord[0] == coord[1]: print('.'), else: print('!'), sys.stdout.flush() cost += cost_matrix[coord] # Get the cost for each assigned coordinate if verbose: print('Final cost of ' + str(cost)) sys.stdout.flush() return cost
def hungarian_matching(h, z): """Hungarian matching algorithm. O(n^3) Alternative approaches. - could compile binary and add op https://github.com/tensorflow/tensorflow/pull/3780 - could just wrap a python op. will be slower. (but easier) """ M = munkres.Munkres() def get_pairings(C): """ An implementation using munkres python library and py_wrap. Args: C (np.ndArray): the costs of different pairings Returns: list: new pairings """ # TODO. this is WAY TOO slow. but maybe its just the hungarian algol? assignments = M.compute(C) return np.array(zip(*assignments)) def cost_fn(x, y): with tf.name_scope('matching_cost'): return tf.reduce_mean( (tf.expand_dims(x, 1) - tf.expand_dims(y, 0))**2, axis=2) with tf.name_scope('matching'): C = cost_fn(h, z) idx = tf.py_func(get_pairings, [C], tf.int64) idx = tf.reshape(idx, [2, tf.shape(h)[0]]) return tf.gather(h, idx[0]), tf.gather(z, idx[1])
def match(): try: candidate_ids = session['candidates'] role_ids = session['roles'] except KeyError: # return redirect(url_for('matching.trial')) cands = Candidate.query.all() roles = Role.query.all() requested_data = 50 candidate_ids = random.sample([c.id for c in cands], requested_data) role_ids = random.sample([r.id for r in roles], int(requested_data*1.5)) ag = Algorithm(weighted_dict={'location': 5, 'skills': 10, 'private office': 10, 'organisation': 5}, candidate_ids=candidate_ids, role_ids=role_ids) candidates = Candidate.query.filter(Candidate.id.in_(candidate_ids)).all() roles = Role.query.filter(Role.id.in_(role_ids)).all() matches = [Match(r, c) for r in roles for c in candidates] sorted_matches = sorted(matches, key=lambda m: m.role_id) m = munkres.Munkres() table_of_objects = [sorted_matches[i:i + len(roles)] for i in range(0, len(matches), len(roles))] table_of_totals = [[sys.maxsize - m.total for m in row] for row in table_of_objects] best_match_indices = m.compute(table_of_totals) best_matches = [table_of_objects[row][column] for row, column in best_match_indices] aggregate = sum([m.total for m in best_matches]) totals = [m.total for m in best_matches] median_average = "{:.1%}".format(statistics.median(totals)/50) return render_template('matching/match.html', aggregate=aggregate, matches=best_matches, average=median_average)
def hybrid_jaccard_similarity(set1, set2, threshold=0.5, function=jaro_winkler_similarity, parameters={}): utils.check_for_none(set1, set2) utils.check_for_type(set, set1, set2) matching_score = [] for s1 in set1: inner = [] for s2 in set2: score = function(s1, s2, **parameters) if score < threshold: score = 0.0 inner.append(1.0 - score) # munkres finds out the smallest element matching_score.append(inner) indexes = munkres.Munkres().compute(matching_score) score_sum, matching_count = 0.0, 0 for r, c in indexes: matching_count += 1 score_sum += 1.0 - matching_score[r][c] # go back to similarity if len(set1) + len(set2) - matching_count == 0: return 1.0 return float(score_sum) / float(len(set1) + len(set2) - matching_count)
def find_person_id_associations(boxes, pts, prev_boxes, prev_pts, prev_person_ids, next_person_id=0, pose_alpha=0.5, similarity_threshold=0.5, smoothing_alpha=0.): """ Find associations between previous and current skeletons and apply temporal smoothing. It requires previous and current bounding boxes, skeletons, and previous person_ids. Args: boxes (:class:`np.ndarray`): current person bounding boxes pts (:class:`np.ndarray`): current human joints prev_boxes (:class:`np.ndarray`): previous person bounding boxes prev_pts (:class:`np.ndarray`): previous human joints prev_person_ids (:class:`np.ndarray`): previous person ids next_person_id (int): the id that will be assigned to the next novel detected person Default: 0 pose_alpha (float): parameter to weight between bounding box similarity and pose (oks) similarity. pose_alpha * pose_similarity + (1 - pose_alpha) * bbox_similarity Default: 0.5 similarity_threshold (float): lower similarity threshold to have a correct match between previous and current detections. Default: 0.5 smoothing_alpha (float): linear temporal smoothing filter. Set 0 to disable, 1 to keep the previous detection. Default: 0.1 Returns: (:class:`np.ndarray`, :class:`np.ndarray`, :class:`np.ndarray`): A list with (boxes, pts, person_ids) where boxes and pts are temporally smoothed. """ bbox_similarity_matrix, pose_similarity_matrix = compute_similarity_matrices( boxes, prev_boxes, pts, prev_pts) similarity_matrix = pose_similarity_matrix * pose_alpha + bbox_similarity_matrix * ( 1 - pose_alpha) m = munkres.Munkres() assignments = np.asarray(m.compute( (1 - similarity_matrix ).tolist())) # Munkres require a cost => 1 - similarity person_ids = np.ones(len(pts), dtype=np.int32) * -1 for assignment in assignments: if similarity_matrix[assignment[0], assignment[1]] > similarity_threshold: person_ids[assignment[0]] = prev_person_ids[assignment[1]] if smoothing_alpha: boxes[ assignment[0]] = (1 - smoothing_alpha) * boxes[assignment[ 0]] + smoothing_alpha * prev_boxes[assignment[1]] pts[assignment[0]] = (1 - smoothing_alpha) * pts[ assignment[0]] + smoothing_alpha * prev_pts[assignment[1]] person_ids[person_ids == -1] = np.arange( next_person_id, next_person_id + np.sum(person_ids == -1)) return boxes, pts, person_ids
def perform_matching(reviewer_dict, conference): num_papers_per_reviewer = conference.num_papers_per_reviewer submitted_papers = conference.submitted_papers reviewer_id_to_index = dict() index_to_reviewer_id = dict() reviewer_id_to_assigned_paper_ids = dict() index_to_paper_id = dict() cost_matrix = np.zeros((num_papers_per_reviewer * len(reviewer_dict))) for i, (reviewer_id, reviewer) in enumerate(reviewer_dict.items()): reviewer_id_to_assigned_paper_ids[reviewer_id] = [] reviewer_id_to_index[reviewer_id] = i index_to_reviewer_id[i] = reviewer_id for n in range(num_papers_per_reviewer): for x, (paper_id, paper) in enumerate(submitted_papers.items()): index_to_paper_id[x] = paper_id y = i * num_papers_per_reviewer + n overlap = reviewer.get_paper_research_overlap_score(paper) cost = -overlap cost_matrix[y, x] = cost indices = munkres.Munkres().compute(cost_matrix) for reviewer_slot, paper_slot in indices: index = reviewer_slot // num_papers_per_reviewer reviewer_id = index_to_reviewer_id[index] paper_id = index_to_paper_id[paper_slot] reviewer_id_to_assigned_paper_ids[reviewer_id].append(paper_id) return reviewer_id_to_assigned_paper_ids
def maximal_match_helper(self, edges, pred_list, gold_list, row_bool): act_row = len(edges) act_col = len(edges[0]) n = max(act_row, act_col) large_val = max([max(elem) for elem in edges]) large_val = (large_val + 1) * (large_val + 1) m = munkres.Munkres() cost = [[large_val for idx in range(n)] for idx in range(n)] for idx in range(act_row): for jdx in range(act_col): cost[idx][jdx] = large_val - edges[idx][jdx] indexes = m.compute(cost) ret = [] for row, col in indexes: if row < act_row and col < act_col: ret.append((large_val - cost[row][col], pred_list[row], gold_list[col])) else: if row < act_row: ret.append( (large_val - cost[row][col], pred_list[row], '')) elif col < act_col: ret.append( (large_val - cost[row][col], '', gold_list[col])) ret = sorted(ret, reverse=True) if row_bool: ret = ret[:act_row] else: ret = ret[:act_col] return ret
def calc_order(d, rerank=False): ret = np.zeros(len(d)) t = 10000 for i in xrange(len(d)): t = min(d[i].shape[0], t) t = min(t, d[i].shape[1]) for i in xrange(len(d)): if rerank: pass else: tp = min(d[i].shape[0], d[i].shape[1]) choices_a = xrange(tp) choices_b = xrange(tp) mat = d[i][choices_a] mat = (mat.T)[choices_b] am = np.array(mat) M = munkres.Munkres() #print mat.shape match = M.compute(am) match = sorted(match) g = [mat[match[0][0]][match[0][1]]] for p in xrange(1, len(match)): g.append(mat[match[p][0]][match[p][1]]) #for q in xrange(p - 1): #if match[p][1] > match[q][1]: # ret[i] += 1 g = sorted(g) for p in xrange(t): ret[i] += g[p] print len(match), ret[i] #print ret return ret
def cost_to_assignment(self, cost, max_cost): # get nXm matrix of cost, and the maximum cost allowed, and return assignments of tracks to detections # inputs: cost - matrix of nXm (num of tracks X num of detections) # costOfNonAssignments- scalar # outputs: assignments- LX2 np.array, where L is the number of detections assigned to tracks # assignments[:][0] the detections, assignments[:][1] the tracks # unassignedTracks - np array of all the tracks that no detection assigned to them # unassignedDetections - np array of all the detections that not assigned to tracks assignments = munkres.Munkres().compute(cost) unassignedTracks = np.empty((0, 1), dtype=int) unassignedDetection = np.empty((0, 1), dtype=int) unAssignedIdx = np.empty((0, 1), dtype=int) for i in range(len(assignments)): track = assignments[i][0] detection = assignments[i][1] if cost[track][detection] >= max_cost: # and self.processNum>5: unassignedTracks = np.vstack((unassignedTracks, track)) unassignedDetection = np.vstack( (unassignedDetection, detection)) # index from assignments unAssignedIdx = np.vstack((unAssignedIdx, i)) numUnassigned = unAssignedIdx.shape for i in range(numUnassigned[0]): j = unAssignedIdx[numUnassigned[0] - i - 1] del assignments[j[0]] return assignments, unassignedTracks, unassignedDetection
def _assign_samples(tcga_metadataset): import pandas as pd import munkres blacklist = [] sample_to_task_assignment = {} for cancer in get_cancers(): filename = tcga_metadataset.get_processed_filename(cancer) dataframe = pd.read_csv(filename, sep='\t', index_col=0, header=0) dataframe = dataframe.drop(blacklist, errors='ignore') permutation = dataframe.index[torch.randperm(len(dataframe.index))] dataframe = dataframe.reindex(permutation) labels = dataframe.notna() labels = labels.applymap(lambda x: 1. if x else munkres.DISALLOWED) all_disallowed = labels.apply(lambda x: True if all(x == munkres.DISALLOWED) else False, axis=1) labels = labels.drop(labels[all_disallowed].index) matrix = labels.values shape = matrix.shape # The +5 allows for some slack in the assignment # which is necessary for the used implementation to converge on BRCA repeats = np.int(np.ceil(shape[0] / shape[1])) + 5 expanded_matrix = np.tile(matrix, (1, repeats)) indices = munkres.Munkres().compute(expanded_matrix) mapped_indices = [(a, b % shape[1]) for a, b in indices] for index, mapped_index in mapped_indices: sample_to_task_assignment.setdefault((dataframe.columns[mapped_index], cancer), []).append( dataframe.index[index]) blacklist.extend(dataframe.index.tolist()) return sample_to_task_assignment
def hungurian(self, cost_matrix): m = munkres.Munkres() index = m.compute(cost_matrix.tolist()) total = 0 for row, column in index: value = cost_matrix[row][column] total += value return total, index
def test_solve(): """ 测试一下 munkres 库如何使用 """ matrix = [[39.4, 78.5, 81.0], [50.2, 68.0, 46.1]] m = munkres.Munkres() indexes = m.compute(matrix) print(indexes)
def approx_xterm_colors(hex_rgbs): """Calculate the approximate unique xterm color for every input color""" matrix = rgb_xterm_diff_matrix(hex_rgbs) m = munkres.Munkres() indexes = m.compute(matrix) for _, column in indexes: yield column + 16
def optimalPairing(x, y): distances = distanceMatrix(x,y) perm = munkres.Munkres().compute(distances) p = [] for i,(a,b) in enumerate(perm): assert i==a p.append(b) # assert p==slowOptimalPairing(x,y) return p
def findBestPermutationListMunkres( X1, X2, atomlist = None ): """ For a given set of positions X1 and X2, find the best permutation of the atoms in X2. Use an implementation of the Hungarian Algorithm in the Python package index (PyPi) called munkres (another name for the algorithm). The hungarian algorithm time scales as O(n^3), much faster than the O(n!) from looping through all permutations. http://en.wikipedia.org/wiki/Hungarian_algorithm http://pypi.python.org/pypi/munkres/1.0.5.2 another package, hungarian, implements the same routine in comiled C http://pypi.python.org/pypi/hungarian/ When I first downloaded this package I got segfaults. The problem for me was casing an integer pointer as (npy_intp *). I may add the corrected version to pygmin at some point """ nsites = len(X1) / 3 if atomlist == None: atomlist = range(nsites) ######################################### # create the cost matrix # cost[j,i] = (X1(i,:) - X2(j,:))**2 ######################################### X1 = X1.reshape([-1,3]) X2 = X2.reshape([-1,3]) cost = makeCostMatrix(X1, X2, atomlist) #cost = np.sqrt(cost) ######################################### # run the munkres algorithm ######################################### matrix = cost.tolist() m = munkres.Munkres() newind = m.compute(matrix) ######################################### # apply the permutation ######################################### costnew = 0. X2new = np.copy(X2) for (iold, inew) in newind: costnew += cost[iold, inew] if iold != inew: atomiold = atomlist[iold] atominew = atomlist[inew] X2new[atominew,:] = X2[atomiold,:] X1 = X1.reshape(-1) X2new = X2new.reshape(-1) # dist = np.linalg.norm(X1-X2new) dist = np.sqrt(costnew) return dist, X1.reshape(-1), X2new.reshape(-1)
def best_alignment(true_seq, seq, K): ''' Find the best matching between the classes in the true and predicted sequence, and return the modified sequence. ''' cm = confusion_matrix(seq, true_seq, np.arange(K)) _, perm = zip(*munkres.Munkres().compute(-cm)) perm = np.asarray(perm) return perm[seq]
def _hungurian_method(self, C): m = munkres.Munkres() indexes = m.compute(C.tolist()) total = 0 for row, col in indexes: value = C[row][col] total = total + value return total, indexes
def _exec(self): m = munkres.Munkres() indexes = m.compute(self._matrix) for row, col in indexes: person = self._people[row] session = self._sessions[col] if isinstance(session, SessionCopy): person.get_allocs().append(session.get_parent()) else: person.get_allocs().append(session)
def hungarian(cost_matrix): # type: (np.ndarray) -> List[(int, int)] """ Calculate the Hungarian solution to the classical assignment problem between two sets of elements (A) and (B). :param cost_matrix: cost matrix such that `cost_matrix [i, j]` represents the association cost of the i-th element in (A) with the j-th element in (B) :return: indexes for the lowest-cost pairings between rows and columns in `cost_matrix` """ if cost_matrix.size > 0: if cost_matrix.shape[0] > cost_matrix.shape[1]: cost_matrix = cost_matrix.transpose((1, 0)) indexes = np.array(munkres.Munkres().compute( cost_matrix.tolist()))[:, ::-1] else: indexes = np.array(munkres.Munkres().compute(cost_matrix.tolist())) else: indexes = [] return indexes
def solve(): matrix = get_matrix() cost_matrix = np.max(matrix) - matrix hungarian_solver = munkres.Munkres() indices = hungarian_solver.compute(cost_matrix) answer = 0 for row, column in indices: answer += matrix[row, column] print "({}, {}) => {}".format(row, column, matrix[row, column]) return answer
def bipartite_ordering(home_coordinates, commute_coordinates, commute_distances): import munkres x = commute_coordinates[:, 0][ np.newaxis, :] - home_coordinates[:, 0][:, np.newaxis] y = commute_coordinates[:, 1][ np.newaxis, :] - home_coordinates[:, 1][:, np.newaxis] distances = np.sqrt(x**2 + y**2) costs = np.abs(distances - commute_distances[:, np.newaxis]) return [index[1] for index in munkres.Munkres().compute(costs)]
def matching_diagrams(p1, p2, plot=False, method='munkres', use_diag=True, new_fig=True, subplot=(111)): '''Returns a list of matching components Possible matching methods: - munkress - marriage problem ''' from scipy.spatial.distance import cdist import munkres from tmd.view import common as _cm def plot_matching(p1, p2, indices, new_fig=True, subplot=(111)): '''Plots matching between p1, p2 for the corresponding indices ''' import pylab as plt fig, ax = _cm.get_figure(new_fig=new_fig, subplot=subplot) for i, j in indices: ax.plot((p1[i][0], p2[j][0]), (p1[i][1], p2[j][1]), color='black') ax.scatter(p1[i][0], p1[i][1], c='r') ax.scatter(p2[j][0], p2[j][1], c='b') if use_diag: p1_enh = p1 + [symmetric(i) for i in p2] p2_enh = p2 + [symmetric(i) for i in p1] else: p1_enh = p1 p2_enh = p2 D = cdist(p1_enh, p2_enh) if method == 'munkres': m = munkres.Munkres() indices = m.compute(np.copy(D)) elif method == 'marriage': first_pref = [np.argsort(k).tolist() for k in cdist(p1_enh, p2_enh)] second_pref = [np.argsort(k).tolist() for k in cdist(p2_enh, p1_enh)] indices = marriage_problem(first_pref, second_pref) if plot: plot_matching(p1_enh, p2_enh, indices, new_fig=new_fig, subplot=subplot) ssum = np.sum([D[i][j] for (i, j) in indices]) return indices, ssum
def permutation_dist(X1, X2): cost = (((X1[np.newaxis, :] - X2[:, np.newaxis, :])**2).sum(2)) matrix = cost.tolist() m = munkres.Munkres() newind = m.compute(matrix) costnew = 0. for (iold, inew) in newind: costnew += cost[iold, inew] return np.sqrt(costnew)
def __hungurian_method(self, C): t = time.time() m = munkres.Munkres() indexes = m.compute(C.tolist()) total = 0 for row, column in indexes: value = C[row][column] total += value print 'PROFILE HUNGURIAN ALGORITHM: ' + str(time.time() - t) return total, indexes
def lowest_cost(self,orders,couriers): """ Given a list of orders and couriers return a list of tuple of indexes that specify which order is assigned to which courier """ if self.is_empty(orders) or self.is_empty(couriers): return [] matrix = self.form_matrix(orders, couriers) mkres = munkres.Munkres() indexes = mkres.compute(matrix) return indexes
def generate_transformed_matrix(self): confusion = self.mat confusion = confusion.T cost_matrix = munkres.make_cost_matrix( confusion, lambda cost: sys.long_info.sizeof_digit - cost) m = munkres.Munkres() indexes = m.compute(cost_matrix) new_mat = np.zeros(confusion.shape) for i in range(len(indexes)): new_mat[:, i] = confusion[:, indexes[i][1]] return new_mat