def calc_hungarian_alignment_score(claim, headline): """Calculate the alignment score between the two texts s and t using the implementation of the Hungarian alignment algorithm provided in https://pypi.python.org/pypi/munkres/.""" claim_tokens = nltk.word_tokenize(claim) headline_tokens = nltk.word_tokenize(headline) df = pd.DataFrame(index=claim_tokens, columns=headline_tokens, data=0.) for c in claim_tokens: for a in headline_tokens: df.loc[c, a] = compute_paraphrase_score(c, a) matrix = df.values cost_matrix = make_cost_matrix(matrix, lambda cost: _max_ppdb_score - cost) indices = _munk.compute(cost_matrix) total = 0.0 for row, column in indices: value = matrix[row][column] total += value # Divide total revenue by size of lower dimension (n) (no. of tokens in shorter claim/headline) # to normalize since the algorithm will always return n pairs of indices. return total / float(np.min(matrix.shape))
def getHungarianIndices(self, referenceChainsCount, mobileChainsCount, matches): """ Run the hungarian algorithm from the Munkres module to determine the optimal matches of chains. Args: referenceChainsCount: number of chains in reference mobileChainsCount: number of chains in mobile matches: the chain matches as determined by prody Returns: optimal matches based on the hungarian algorithm in the form of indices and the corresponding matchesMatrix for the indicies """ profitStack = [None] * (referenceChainsCount * mobileChainsCount) matchesStack = [None] * (referenceChainsCount * mobileChainsCount) for element in range(0, len(matches)): profitStack[element] = self.hungarianProfit( matches[element][2], matches[element][3]) matchesStack[element] = matches[element] profitMatrix = np.zeros((referenceChainsCount, mobileChainsCount)) matchesMatrix = np.zeros((referenceChainsCount, mobileChainsCount), dtype=object) for row in range(0, referenceChainsCount): for column in range(0, mobileChainsCount): profitMatrix[row][column] = profitStack.pop(0) matchesMatrix[row][column] = matchesStack.pop(0) profitMatrix = profitMatrix.tolist() cost_matrix = make_cost_matrix(profitMatrix, lambda cost: 1000.0 - cost) m = Munkres() indices = m.compute(cost_matrix) return indices, matchesMatrix
def d2dAllocate(lambda_matrix, maximum_rb_allowed): #if multiple resource blocks are allowed for d2d users then repeat the rows if (not (maximum_rb_allowed == 1)): lambda_matrix_new = [] for i in range(0, len(lambda_matrix)): for j in range(0, maximum_rb_allowed): lambda_matrix_new.append(lambda_matrix[i]) lambda_matrix = lambda_matrix_new #convert profit matrix to cost matrix cost_matrix = make_cost_matrix(lambda_matrix, lambda cost: sys.maxsize - cost) m = Munkres() indexes = m.compute( cost_matrix ) #indexes contains the 2d indexes of the maximum weight allocations allocated_d2d_in_channels = np.zeros(len(lambda_matrix[0])) - 1 d2d_and_indexes = [] #indexes to return for row, column in indexes: allocated_d2d_in_channels[column] = int(row / maximum_rb_allowed) allocated_d2d_in_channels = allocated_d2d_in_channels.astype(int) for i in range(0, len(allocated_d2d_in_channels)): if (not (allocated_d2d_in_channels[i] == -1)): d2d_and_indexes.append([ allocated_d2d_in_channels[i], [allocated_d2d_in_channels[i], i] ]) return d2d_and_indexes
def computePerfectMatching(self): # transform dict to array inputMatrix = self.transformDictToArray() # compute perfect matching cost_matrix = make_cost_matrix(inputMatrix, lambda cost: sys.maxsize - cost) m = Munkres() perfect_indexes = m.compute(cost_matrix) # produce the final result # find if rowIndex is in table1 or table2 # if self.table1[0]['head']['row']: # # index a is in table1 # for (a, b) in perfect_indexes: # # find elements with indexes a,b in the left and right tables # # self.toBeJoined.append(()) # else: # # index a is in table2 # for (a, b) in perfect_indexes # # return perfect_indexes
def _find_optimal_match(self): matrix = [[0 for _ in range(len(self._orders))] for _ in range(len(self._couriers))] viewed_rows = {i: 0 for i in range(len(self._couriers))} for i, courier in enumerate(self._couriers): for j, order in enumerate(self._orders): revenue_from_completion = self.revenue_from_completing_order( courier, order) if revenue_from_completion == float('-inf'): matrix[i][j] = DISALLOWED else: matrix[i][j] = revenue_from_completion elements_to_remove = [ x for x in viewed_rows if viewed_rows[x] == len(self._orders) ] matrix = np.delete(matrix, elements_to_remove, axis=0) if elements_to_remove: self._couriers = [ x for idx, x in enumerate(self._couriers) if idx not in elements_to_remove ] if matrix.size == 0: return None cost_matrix = make_cost_matrix( matrix, lambda cost: (sys.maxsize - cost) if (cost != DISALLOWED) else DISALLOWED) indexes = self.m.compute(cost_matrix) if not indexes: return None return indexes
def munkres_score(gt, pred): """ :param gt: a list of lists, each containing ints :param pred: a list of lists, each containing ints :return: accuracy """ # Combine all the sequences into one long sequence for both gt and pred gt_combined = np.concatenate(gt) pred_combined = np.concatenate(pred) # Make sure we're comparing the right shapes assert(gt_combined.shape == pred_combined.shape) # Build out the contingency matrix # This follows the methodology suggested by Zhou, De la Torre & Hodgkins, PAMI 2013. mat = contingency_matrix(gt_combined, pred_combined) # We need to make the cost matrix # We use the fact that no entry can exceed the total length of the sequence cost_mat = make_cost_matrix(mat, lambda x: gt_combined.shape[0] - x) # Apply the Munkres method (also called the Hungarian method) to find the optimal cluster correspondence m = Munkres() indexes = m.compute(cost_mat) # Pull out the associated 'costs' i.e. the cluster overlaps for the correspondences we've found cluster_overlaps = mat[list(zip(*indexes))] # Now compute the accuracy accuracy = np.sum(cluster_overlaps)/float(np.sum(mat)) return accuracy
def calc_hungarian_alignment_score(self, s, t): """Calculate the alignment score between the two texts s and t using the implementation of the Hungarian alignment algorithm provided in https://pypi.python.org/pypi/munkres/.""" s_toks = get_tokenized_lemmas(s) t_toks = get_tokenized_lemmas(t) #print("#### new ppdb calculation ####") #print(s_toks) #print(t_toks) df = pd.DataFrame(index=s_toks, columns=t_toks, data=0.) for c in s_toks: for a in t_toks: df.ix[c, a] = self.compute_paraphrase_score(c, a) matrix = df.values cost_matrix = make_cost_matrix(matrix, lambda cost: _max_ppdb_score - cost) indexes = _munk.compute(cost_matrix) total = 0.0 for row, column in indexes: value = matrix[row][column] total += value #print(s + ' || ' + t + ' :' + str(indexes) + ' - ' + str(total / float(np.min(matrix.shape)))) # original procedure returns indexes and score - i do not see any use for the indexes as a feature # return indexes, total / float(np.min(matrix.shape)) return total / float(np.min(matrix.shape))
def score(self, seq_gt, seq_pred): seq_gt = self.prep_seq(seq_gt) seq_pred = self.prep_seq(seq_pred) m, n = len(seq_gt), len(seq_pred) # length of two sequences if m == 0: return 1. if n == 0: return 0. similarities = torch.zeros((m, n)) for i in range(m): for j in range(n): a = self.vectors[seq_gt[i]] b = self.vectors[seq_pred[j]] a = torch.from_numpy(a) b = torch.from_numpy(b) similarities[i, j] = torch.mean( F.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0))).unsqueeze(-1) similarities = (similarities + 1) / 2 similarities = similarities.numpy() ass = self.munkres.compute(munkres.make_cost_matrix(similarities)) intersection_score = .0 for a in ass: intersection_score += similarities[a] iou_score = intersection_score / (m + n - intersection_score) return iou_score
def similar(list_current,list_called): global P,total # comparing the length of two files to compare smaller length to bigger if len(list_current)<len(list_called): # calling comparison function to compare both files line by line for similarity similarity = comparison(list_current,list_called) # storing the lenght of smaller P = len(list_current) point=[[0 for x in range(len(list_called))] for y in range(len(list_current))] else: # calling comparison function to compare both files line by line for similarity similarity = comparison(list_called,list_current) P = len(list_called) point=[[0 for x in range(len(list_current))] for y in range(len(list_called))] # calling functions of munkres to form maximum weighted bipartite matching graph graph_matrix = make_cost_matrix(similarity, lambda cost: 1.0 - cost) m = Munkres() indexes =m.compute(graph_matrix) total = 0 for row, column in indexes: # forming list of points(lines) of similarity between two files value = similarity[row][column] if value>0.0: total += 1 point[row][column]=1 return point
def make_cost_matrix(profit_matrix, inversion_function): """ **DEPRECATED** Please use the module function ``make_cost_matrix()``. """ import munkres return munkres.make_cost_matrix(profit_matrix, inversion_function)
def maximum_weight_bipartite(matrix): cost_matrix = make_cost_matrix(matrix, lambda cost: 100000 - cost) m = Munkres() indices = m.compute(cost_matrix) return indices
def maxi(): #matrix = [[20, 16, 22, 18], # [25, 28, 15, 21], # [27, 20, 23, 26], # [24, 22, 23, 22]] f = open('newds.txt') n = int(f.readline()) matrix = [] for i in range(n): list1 = map(int, (f.readline()).split()) matrix.append(list1) cost_matrix = make_cost_matrix(matrix) m = Munkres() indexes = m.compute(cost_matrix) print_matrix(matrix, msg='Highest profits through this matrix:') total = 0 for row, column in indexes: value = matrix[row][column] total += value print('(%d, %d) -> %d' % (row, column, value)) print('total profit=%d' % total) start_time = time.clock() timex = time.clock() - start_time time2 = ("Time =", timex, "seconds") print(time2) #timex1 = string(timex) timetext = open('times2.txt', 'a+') timetext.write(str(timex)) main()
def filter_rects(all_rects, threshold, net_config, input_rects=[], max_threshold=1.0): """Takes in all_rects and based on the threshold carries out the stitching process as described in the paper.""" accepted_rects = input_rects for i in range(0, net_config["grid_height"], 1): for j in range(0, net_config["grid_width"], 1): relevant_rects = [] current_rects = [ r for r in all_rects[i][j] if r.confidence > threshold ] for other in accepted_rects: for current in current_rects: if other.overlaps(current): relevant_rects.append(other) break if len(relevant_rects) == 0 or len(current_rects) == 0: accepted_rects += current_rects continue matrix = [] for c in current_rects: row = [] for a in relevant_rects: row.append(1000) if a.overlaps(c): row[-1] -= 100 row[-1] += a.distance(c) / 1000.0 matrix.append(row) m = Munkres() cost_matrix = make_cost_matrix(matrix, lambda x: x) indices = m.compute(matrix) bad = set() for row, column in indices: c = current_rects[row] a = relevant_rects[column] if c.confidence > max_threshold: bad.add(row) continue if c.overlaps(a): if c.confidence > a.confidence and c.iou(a) > 0.7: c.true_confidence = a.confidence accepted_rects.remove(a) else: bad.add(row) for k in range(len(current_rects)): if k not in bad: accepted_rects.append(current_rects[k]) return accepted_rects
def KM_mapping(action, VEHICLES, request_selected, vehicle, current_time): # ????? ''' :param prob_weights: a_prob :return: matching final action,"indexes": ''' print('cal_profit begin') profit_matrix = cal_profit(VEHICLES, request_selected, vehicle, current_time) print('cal_profit end') action = action.reshape([1, VEHICLES_NUMS * REQUEST_NUMS]) action = np.apply_along_axis(lambda x: round(x[0], 2), 0, action) action_weights = action.reshape([VEHICLES_NUMS, REQUEST_NUMS]) km_matrix = profit_matrix * action_weights # km_weights = make_cost_matrix(km_matrix, lambda item: (maxsize - item) if item != 0 else DISALLOWED) km_weights = make_cost_matrix(km_matrix, lambda item: (400 - item)) m = Munkres() print('km begin') indexes = m.compute(km_weights) print('km_end') print_matrix(profit_matrix, msg='Highers profit through this matrix:') total = 0 temp_indexes = copy.deepcopy(indexes) for row, column in temp_indexes: value = profit_matrix[row][column] if value == 0: indexes.remove((row, column)) total += value return indexes, total # KM_mapping([])
def KM_mapping(action, REQUESTS, VEHICLES, request_selected, vehicle, current_time): # ????? ''' :param prob_weights: a_prob :return: matching final action,"indexes": ''' # prob_weights = [5, DISALLOWED, 70, 0, # 10, 3, 2, 3, # 9, DISALLOWED, 4, 5, # 1,2,3,4, # 90,5,1,DISALLOWED] profit_matrix = cal_profit(REQUESTS, VEHICLES, request_selected, vehicle, current_time) km_matrix = profit_matrix * action.reshape([VEHICLES_NUMS, REQUEST_NUMS]) km_weights = make_cost_matrix( km_matrix, lambda item: (maxsize - item) if item != 0 else DISALLOWED) # matrix = np.array(prob_weights) # matrix = np.reshape(matrix, [VEHICLES_NUMS, REQUEST_NUMS]) # matrix = matrix.transpose() # cost_matrix = make_cost_matrix(matrix, lambda cost: (maxsize - cost) if (cost != DISALLOWED) else DISALLOWED) m = Munkres() indexes = m.compute(km_weights) print_matrix(profit_matrix, msg='Highers profit through this matrix:') total = 0 for row, column in indexes: # print(row, column) value = profit_matrix[row][column] total += value # print('(%d, %d) -> %d' % (row, column, value)) # print('total profit: %d' % total) return indexes, total # KM_mapping([])
def get_suitability_score(customer_list, product_list): ''' calculate the total maximum suitability score by using munkres algorithm and returns a detailed customer_product_enties & total suitability score ''' suitability_scores = [] customer_suitability_scores = [] for customer in customer_list: for product in product_list: customer_suitability_scores.append(SuitabilityScore.calculate_suitability_score(customer,product)) suitability_scores.append(customer_suitability_scores) customer_suitability_scores = [] customer_product_entries = [] cost_matrix = make_cost_matrix(suitability_scores, lambda cost: 1e10 - cost) munkres = Munkres() indexes = munkres.compute(cost_matrix) total_suitability_score = 0 for customer_index, product_index in indexes: suitability_score = suitability_scores[customer_index][product_index] total_suitability_score += suitability_score suitability_score_entry = SuitabilityScoreEntry(customer_list[customer_index],product_list[product_index],suitability_score) customer_product_entries.append(suitability_score_entry) #print(customer_index,product_index) return customer_product_entries,total_suitability_score
def ngramset_edit_distance(set1, set2): def get_yxgraph_distance(x, y): import math if (x == y): return 0 elif (x > y): return math.sqrt(math.pow((x - y), 2)) else: return -math.sqrt(math.pow((y - x), 2)) matrix = _ngram_matrix(set1, set2) # with open("matrix", 'wb') as file: # file.write(str(matrix)) cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost) m = Munkres() indexes = m.compute(cost_matrix) # total = 0.0 max_matrix = [] xygraph_distance_list = [] for row, column in indexes: value = matrix[row][column] max_matrix.append(value) xygraph_distance_list.append(get_yxgraph_distance(row, column)) # total += value edit_distance = numpy.mean(max_matrix) / 100 variance = numpy.var(max_matrix) # if edit_distance > 0.7: # sim2 = _similarity(xygraph_distance_list, 2) # sim3 = _similarity(xygraph_distance_list, 3) # return edit_distance, variance, sim2, sim3, xygraph_distance_list return edit_distance, variance, None, None, xygraph_distance_list
def perform_alignment(ref_instances, sys_instances, kernel, maximize=True): disallowed = {} max_sim = 0 sim_matrix, component_matrix = [], [] for s_i, s in enumerate(sys_instances): sim_row = [] comp_row = [] for r_i, r in enumerate(ref_instances): sim, comp = kernel(r, s) sim_row.append(sim) comp_row.append(comp) if sim == DISALLOWED: disallowed[(s_i, r_i)] = True else: if sim > max_sim: max_sim = sim sim_matrix.append(sim_row) component_matrix.append(comp_row) if maximize: def _mapper(sim): return max_sim + 1 if sim == DISALLOWED else (max_sim + 1) - sim else: def _mapper(sim): return max_sim + 1 if sim == DISALLOWED else sim matrix = make_cost_matrix(sim_matrix, _mapper) correct_detects, false_alarms, missed_detects = [], [], [] unmapped_sys = set(range(0, len(sys_instances))) unmapped_ref = set(range(0, len(ref_instances))) if len(matrix) > 0: for s_i, r_i in Munkres().compute(matrix): if disallowed.get((s_i, r_i), False): continue unmapped_sys.remove(s_i) unmapped_ref.remove(r_i) correct_detects.append( AlignmentRecord(ref_instances[r_i], sys_instances[s_i], sim_matrix[s_i][r_i], component_matrix[s_i][r_i])) for r_i in unmapped_ref: missed_detects.append( AlignmentRecord(ref_instances[r_i], None, None, None)) for s_i in unmapped_sys: false_alarms.append( AlignmentRecord(None, sys_instances[s_i], None, None)) return (correct_detects, missed_detects, false_alarms)
def test_profit(): profit_matrix = [[94, 66, 100, 18, 48], [51, 63, 97, 79, 11], [37, 53, 57, 78, 28], [59, 43, 97, 88, 48], [52, 19, 89, 60, 60]] import sys cost_matrix = munkres.make_cost_matrix(profit_matrix, lambda cost: sys.maxsize - cost) indices = m.compute(cost_matrix) profit = sum([profit_matrix[row][column] for row, column in indices]) assert_equals(profit, 392)
def max_match(matrix): cost_matrix = make_cost_matrix(matrix, lambda cost: 100.0-cost) m = Munkres() indexes = m.compute(cost_matrix) #print_matrix(matrix, msg='Lowest cost through this matrix:') total = 0 for row, column in indexes: value = matrix[row][column] total += value #print '(%d, %d) -> %d' % (row, column, value) return total
def filter_rects(all_rects, threshold, input_rects=[], max_threshold=1.0, config=None): """Takes in all_rects and based on the threshold carries out the stitching process as described in the paper.""" accepted_rects = input_rects for i in range(0, config["grid_height"], 1): for j in range(0, config["grid_width"], 1): relevant_rects = [] current_rects = [r for r in all_rects[i][j] if r.confidence > threshold] for other in accepted_rects: for current in current_rects: if other.overlaps(current): relevant_rects.append(other) break if len(relevant_rects) == 0 or len(current_rects) == 0: accepted_rects += current_rects continue matrix = [] for c in current_rects: row = [] for a in relevant_rects: row.append(1000) if a.overlaps(c): row[-1] -=100 row[-1] += a.distance(c) / 1000.0 matrix.append(row) m = Munkres() cost_matrix = make_cost_matrix(matrix, lambda x: x) indices = m.compute(matrix) bad = set() for row, column in indices: c = current_rects[row] a = relevant_rects[column] if c.confidence > max_threshold: bad.add(row) continue if c.overlaps(a): if c.confidence > a.confidence and c.iou(a) > 0.7: c.true_confidence = a.confidence accepted_rects.remove(a) else: bad.add(row) for k in range(len(current_rects)): if k not in bad: accepted_rects.append(current_rects[k]) return accepted_rects
def compute_matches(affinity_scores, j1_pts, j2_pts): matching_results = [] match_confidence_threshold = CONFIG.match_confidence_threshold j1_count, j2_count = affinity_scores.shape indices = MUNKRES_INSTANCE.compute(make_cost_matrix(affinity_scores.tolist(), inversion_function=lambda x : 2 - x)) for row,col in indices: if(affinity_scores[row,col]>match_confidence_threshold): matching_results.append((j1_pts[row], j2_pts[col], affinity_scores[row,col])) return matching_results
def generate_transformed_matrix(self): confusion = self.mat confusion = confusion.T cost_matrix = munkres.make_cost_matrix( confusion, lambda cost: sys.long_info.sizeof_digit - cost) m = munkres.Munkres() indexes = m.compute(cost_matrix) new_mat = np.zeros(confusion.shape) for i in range(len(indexes)): new_mat[:, i] = confusion[:, indexes[i][1]] return new_mat
def return_munkres_result(matrix): cost_matrix = make_cost_matrix(matrix, lambda cost: (sys.maxsize - cost) if (cost != DISALLOWED) else DISALLOWED) m = Munkres() indexes = m.compute(cost_matrix) bestMatches = [] for row, column in indexes: value = matrix[row][column] bestMatches.append(value) return bestMatches
def show_csv_file(request, csv_id): data = CsvFile.objects.filter(id=csv_id).first().text.strip() mentors = set() mentees = set() lines = data.splitlines() for line in lines[1:]: if len(line) <= 1: continue print(line) mentor, mentee, utility = [x.strip() for x in line.split(',')] mentors.add(mentor) mentees.add(mentee) matrix = [[DISALLOWED] * len(mentees) for x in mentors] mentors = {x[1]: x[0] for x in enumerate(sorted(mentors))} mentees = {x[1]: x[0] for x in enumerate(sorted(mentees))} for line in lines[1:]: if len(line) <= 1: continue mentor, mentee, utility = [x.strip() for x in line.split(',')] utility = float(utility) matrix[mentors[mentor]][mentees[mentee]] = utility cost_matrix = make_cost_matrix( matrix, lambda cost: (200 - cost) if (cost != DISALLOWED) else DISALLOWED) m = Munkres() indexes = m.compute(cost_matrix) mentors = {v: k for k, v in mentors.items()} mentees = {v: k for k, v in mentees.items()} used_mentors = set() used_mentees = set() table = [] for row, column in indexes: value = matrix[row][column] mentor = mentors[row] mentee = mentees[column] table.append([mentor, mentee, value]) used_mentors.add(mentor) used_mentees.add(mentee) mentors = set([x for x in mentors.values()]) mentees = set([x for x in mentees.values()]) unused_mentees = mentees - used_mentees unused_mentors = mentors - used_mentors table = sorted(table, key=lambda x: x[0]) context = { 'table': table, 'unused_mentors': unused_mentors, 'unused_mentees': unused_mentees } return render(request, 'show_table.html', context=context)
def compute_agreements(similarity): import munkres import numpy as np m = munkres.Munkres() print("Computing mapping...") similarity = munkres.make_cost_matrix(similarity, lambda cost: 1 - cost) indexes = m.compute(similarity) agreement = np.sum([1 - similarity[r][c] for r, c in indexes]) / len(similarity) print("Agreement:", agreement) return agreement
def test_profit_float(): profit_matrix = [[94.01, 66.02, 100.03, 18.04, 48.05], [51.06, 63.07, 97.08, 79.09, 11.1], [37.11, 53.12, 57.13, 78.14, 28.15], [59.16, 43.17, 97.18, 88.19, 48.2], [52.21, 19.22, 89.23, 60.24, 60.25]] import sys cost_matrix = munkres.make_cost_matrix(profit_matrix, lambda cost: sys.maxsize - cost) indices = m.compute(cost_matrix) profit = sum([profit_matrix[row][column] for row, column in indices]) assert profit == pytest.approx(362.65)
def calculateSimilarity(sent1, sent2): # Case Correction sent1 = sent1.lower() sent2 = sent2.lower() # Tokenization tokens1 = word_tokenize(sent1) tokens2 = word_tokenize(sent2) # Remove punctuations tokens1 = [x for x in tokens1 if x not in string.punctuation] tokens2 = [x for x in tokens2 if x not in string.punctuation] # Remove Stopwords # stopWords = set(stopwords.words('english')) # tokens1 = [x for x in tokens1 if x not in stopWords] # tokens2 = [x for x in tokens2 if x not in stopWords] # Lemmatization # lemmatizer = WordNetLemmatizer() # tokens1 = [lemmatizer.lemmatize(x) for x in tokens1] # tokens2 = [lemmatizer.lemmatize(x) for x in tokens2] # Model has token? tokens1 = [x for x in tokens1 if x in model.vocab] tokens2 = [x for x in tokens2 if x in model.vocab] if len(tokens1) > 0 and len(tokens2) > 0: m = Munkres() pairMatrix = [] for t1 in tokens1: tmpList = [] for t2 in tokens2: tmpList.append(100 * JSD(model[t1], model[t2])) pairMatrix.append(tmpList) cost_matrix = make_cost_matrix(pairMatrix, lambda cost: 100 - cost) indexes = m.compute(cost_matrix) # print_matrix(pairMatrix, msg='Lowest cost through this matrix:') total = 0 for row, column in indexes: value = pairMatrix[row][column] total += value # print('(%d, %d) -> %d' % (row, column, value)) # print('total cost: %d' % total) # print(total / len(indexes)) return total / len(indexes) / 100 # return 2 * total / (len(tokens1) + len(tokens2)) / 100 else: return 0
def KM_mapping(REQUESTS, VEHICLES, request_selected, vehicle, current_time): profit_matrix = cal_profit(REQUESTS, VEHICLES, request_selected, vehicle, current_time) km_weights = make_cost_matrix( profit_matrix, lambda item: (maxsize - item) if item != 0 else DISALLOWED) m = Munkres() indexes = m.compute(km_weights) total = 0 for row, column in indexes: value = profit_matrix[row][column] total += value return indexes, total
def test_profit(): profit_matrix = [[94, 66, 100, 18, 48], [51, 63, 97, 79, 11], [37, 53, 57, 78, 28], [59, 43, 97, 88, 48], [52, 19, 89, 60, 60]] import sys cost_matrix = munkres.make_cost_matrix( profit_matrix, lambda cost: sys.maxsize - cost ) indices = m.compute(cost_matrix) profit = sum([profit_matrix[row][column] for row, column in indices]) assert_equals(profit, 392)
def calculateIdealComposition(lineup, composition): m = Munkres() matrix = buildMatrix(lineup, composition) cost_matrix = munkres.make_cost_matrix(matrix, lambda cost: int(100 - (cost * 100))) indexes = m.compute(cost_matrix) picks = [] for index in indexes: picks.append((lineup[index[0]], composition[index[1]])) return picks
def cluster_assignment(id2label_1, id2label_2): """ Assignes cluster names from first clusterisation to cluster names of second clusterisation using Hungarian algorithm of the assignment problem (maximising the similarity between clusters from different clusterisations) Labels could have different lengths and contain different types of objects Arguments: id2label_1: dict of id to label from first clusterisation id2label_2: dict of id to label from second clusterisation Returns: dict: how to transform first labels to second """ # Get intersection of two dicts ids = [] labels_1, labels_2 = [], [] for news_id, label_1 in id2label_1.items(): if news_id in id2label_2: ids.append(news_id) labels_1.append(label_1) labels_2.append(id2label_2[news_id]) n_clusters_1 = len(set(labels_1)) n_clusters_2 = len(set(labels_2)) # Encode labels so they would be from 0 to max l_encoder_1 = LabelEncoder().fit(labels_1) labels_1_transf = l_encoder_1.transform(labels_1) l_encoder_2 = LabelEncoder().fit(labels_2) labels_2_transf = l_encoder_2.transform(labels_2) # Create matrix of distances between two clusters matrix = [[0] * n_clusters_2 for i in range(n_clusters_1)] for i in range(min(len(labels_1), len(labels_2))): matrix[labels_1_transf[i]][labels_2_transf[i]] += 1 # Compute Munkres (Hungarian) algorithm cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxsize - cost) m = Munkres() indices = m.compute(cost_matrix) # Transform labels back transform = {} for (label_1, label_2) in indices: transform[l_encoder_1.inverse_transform( label_1)] = l_encoder_2.inverse_transform(label_2) return transform
def _find_best_permutation(self, spectral, spatial, idx_constant): '''Finds the best permutation of classes in spectral and spatial model. Args: spectral (np.array): Conditional log-likelihood of spectral model (as in Eq.19) in [1], shape (N,T,F) spatial (np.array): Conditional log-likelihood of spatial model (as in Eq.19) in [1], shape (N,T,F) idx_constant (tuple or int) indices of axis which have constant permutation Examples: idx_constant = (1,2) -> for all time frames and frequency bins the permutation is constant (finding 1 global permutation) idx_constant = 1 -> for all time frames the permutation is constant (finding permutation for each frequency) Returns: permutations (dict): mapping tuples of time and frequency indices to the best permutation. For constant indices, the map contains only index 0. Examples: permutations = {(0,0) : [2, 0, 1]} -> one global permutation (idx_constant = (1,2)) -> spectral comp. 0 corresponds to spatial comp. 2 -> spectral comp. 1 corresponds to spatial comp. 0 -> spectral comp. 2 corresponds to spatial comp. 1 [1] Integration of variational autoencoder and spatial clustering for adaptive multi-channel neural speech separation; K. Zmolikova, M. Delcroix, L. Burget, T. Nakatani, J. Cernocky ''' if isinstance(idx_constant, int): idx_constant = (idx_constant, ) idx_constant = tuple([i + 1 for i in idx_constant]) perm_scores = logsumexp(spectral[:, None, :, :] + spatial[None, :, :, :], axis=idx_constant) perm_scores = np.expand_dims(perm_scores, idx_constant) permutations = {} for i1, i2 in np.ndindex(perm_scores.shape[-2:]): idx_perm = Munkres().compute( make_cost_matrix(perm_scores[:, :, i1, i2])) idx_perm.sort(key=lambda x: x[0]) permutations[i1, i2] = [i[1] for i in idx_perm] return permutations
def matching(male_distances): cost_matrix = munkres.make_cost_matrix(male_distances, lambda cost: 1.0 - cost) m = Munkres() indices = m.compute(cost_matrix) total = 0.0 pairings = {} for row, column in indices: value = male_distances[row][column] total += value pairings[column] = [row] #print "Satish was here" print 'total profit=%f' % total return pairings
def symmetric_matching(table): """Cluster matching that maximizes the total number of elements in common while matching at most one cluster to another. >>> symmetric_matching([[2, 2, 1], [1, 0, 2]]) [(0, 0), (1, 2)] >>> symmetric_matching([[3, 2, 1], [2, 0, 1]]) [(0, 0), (1, 2)] """ maximum = max(it.chain(*table)) cost_matrix = make_cost_matrix(table, lambda x: maximum - x) indices = Munkres().compute(cost_matrix) return indices
def make_match(advertisers, persons, ctrfunc = funky_ctr): # make the ctr matrix ctr_matrix = make_ctr_matrix(advertisers, persons, ctrfunc) # convert it to a cost matrix by subtracting all values from a larger value cost_matrix = make_cost_matrix(ctr_matrix, lambda ctr: sys.maxsize - ctr) # compute the match match = Munkres().compute(cost_matrix) # elements in match are two-element lists, where the first is the # index into advertisers and second is index into persons # compute the total ctr by looking up the match elements in the ctr # matrix total_ctr = sum(map(lambda pair: ctr_matrix[pair[0]][pair[1]], match)) return match, total_ctr
def do_hungarian_assignment(dict_a, dict_b, cost_func, yield_condt, cost_matrix_func=None): for ka in dict_a.keys(): if ka in dict_b: ia = dict_a[ka] ib = dict_b[ka] mat = [ [cost_func(a,b) for b in ib] for a in ia] #max similarity calculation c_mat = None if cost_matrix_func is not None: c_mat = make_cost_matrix(mat,cost_matrix_func) else: c_mat = mat indexes = MUNKR.compute(c_mat) for row, col in indexes: # yield only if condition satisfied if yield_condt(mat[row][col]): #print '(%d, %d) -> %d' % (row, col, mat[row][col]) yield ia[row], ib[col]
def assignDuties(dayA, dayB): if len(dayA) != len(dayB): print "Illegal!! number of duties should be equal every day" exit(1) matrix = createMatrix(len(dayA)) ##Fill the matrix with the similarities for i in range(len(dayA)): for j in range(len(dayA)): matrix[i][j] = calcSimilarity(dayA[i], dayB[j]) ##The following line is called to make sure we find the maximum sum and not the minimum ##Note that since all similarities are in [0,1] 2 is bigger than all. cost_matrix = make_cost_matrix(matrix, lambda cost: 2 - cost) m = Munkres() ##Indexes will contain the assignment indexes = m.compute(cost_matrix) res = [] for row, column in indexes: res.append(("Driver "+str(row + 1) ,("Day 1 duty: "+str(row + 1),"Day 2 duty: "+str(column + 1)))) return res
def get_best_matching(source_corpus, target_corpus, scores): stripper = LanguageStripper() err = 0 m = munkres.Munkres() cost_matrix = munkres.make_cost_matrix(scores, lambda cost: 1 - cost) indexes = m.compute(cost_matrix) for row, column in indexes: s_url = source_corpus.keys()[row] t_url = target_corpus.keys()[column] success = stripper.strip(t_url) == stripper.strip(s_url) if not success: err += 1 # sys.stdout.write("%f\t%s\t%s\t%s\n" % # (scores[row, column], success, s_url, t_url)) n = min(len(source_corpus), len(target_corpus)) sys.stderr.write("Correct: %d out of %d = %f%%\n" % (n - err, n, (1. * n - err) / n))
def hungarianAssignment(cases, controls, numberOfControlsPerCase): selectedControls = list() convFactor = 100000.00 m = list() for control in controls: row = list() for _ in xrange(numberOfControlsPerCase): row += [(control.relatedTo.get(case) if (control.relatedTo.has_key(case)) else 0) for case in cases] m.append(row) cm = munkres.make_cost_matrix(m, lambda cost: convFactor - cost * convFactor) matrix = cm m = Munkres() indexes = m.compute(matrix) total = 0 for row, column in indexes: value = matrix[row][column] total += value if value < convFactor: selectedControls.append(controls[row % len(controls)]) print("\tassignment kic score: %f" % (float(len(indexes) * convFactor - total) / convFactor)) print("\tall kic score: %f" % (kicScore(cases, selectedControls))) return [person.id for person in selectedControls]
def calc_hungarian_alignment_score(s, t): """Calculate the alignment score between the two texts s and t using the implementation of the Hungarian alignment algorithm provided in https://pypi.python.org/pypi/munkres/.""" s_toks = get_tokenized_lemmas(s) t_toks = get_tokenized_lemmas(t) df = pd.DataFrame(index=s_toks, columns=t_toks, data=0.) for c in s_toks: for a in t_toks: df.ix[c, a] = compute_paraphrase_score(c, a) matrix = df.values cost_matrix = make_cost_matrix(matrix, lambda cost: _max_ppdb_score - cost) indexes = _munk.compute(cost_matrix) total = 0.0 for row, column in indexes: value = matrix[row][column] total += value return indexes, total / float(np.min(matrix.shape))
def linkTemplates(self, sentence): """ link group size and group templates using Hungarian matching algorithm """ templates = sentence.templates qTemplateList = templates.getList(self.quantityType) mTemplateList = templates.getList(self.mentionType) nQuantities = len(qTemplateList) nMentions = len(mTemplateList) maxSize = max(nQuantities, nMentions) if nQuantities == 0 or nMentions == 0: return probMatrix = [] for qIdx in range(maxSize): probMatrix.append([]) for mIdx in range(maxSize): probMatrix[qIdx].append(0) for fv in templates.featureVectors: probMatrix[fv.valueId][fv.mentionId] = fv.prob * 1000 costMatrix = munkres.make_cost_matrix(probMatrix, lambda cost: 1000 - cost) m = munkres.Munkres() # print probMatrix # print costMatrix indices = m.compute(costMatrix) for qIdx, mIdx in indices: if qIdx < nQuantities and mIdx < nMentions: prob = probMatrix[qIdx][mIdx] if prob >= 500: # this quantity and mention should be associated prob = float(prob) / 1000 qTemplate = qTemplateList[qIdx] mTemplate = mTemplateList[mIdx] self.linkQuantityAndMention(qTemplate, mTemplate, prob)
lda=np.reshape(lda,[145*145]) ##Adding dummy label from 0-16 gmm_vote=np.append(gmm,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]) lda_vote=np.append(lda,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]) cmeans_vote=np.append(cmeans,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]) kmeans_vote=np.append(kmeans,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]) isodata_vote=np.append(isodata,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]) svm_vote=np.append(svm,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]) ###Construct Label Map with K-Means as the baseline #Construct Label Map between K-Means and LDA matrix=contingency_matrix(kmeans_vote,lda_vote) matrix = matrix.tolist() cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxsize - cost) m = Munkres() indexes = m.compute(cost_matrix) total = 0 newlabelA=[] for row, column in indexes: value = matrix[row][column] total += value newlabelA.append(column) #Construct Label Map between K-Means and Gausian Mixture Model matrix=contingency_matrix(kmeans_vote,gmm_vote) matrix = matrix.tolist() cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxsize - cost) m = Munkres() indexes = m.compute(cost_matrix)
matrix3 = ([[7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583], [627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583, 639, 913], [447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423, 159, 743], [217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462, 350], [960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172, 812, 350], [870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425,329, 803], [973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165, 992, 326], [322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601, 95, 973], [445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448, 848], [414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430, 392, 198], [184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284, 760, 390], [821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192, 156, 574], [34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329, 699], [815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251, 631, 107], [813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302, 35, 805]]) costmat = make_cost_matrix(matrix3, lambda cost: sys.maxint - cost) m = Munkres() indexes = m.compute(costmat) tot = 0 for row,col in indexes: tp = matrix3[row][col] tot += tp print indexes print tot
def linkTemplates(self, sentence): """ link group size and group templates using Hungarian matching algorithm """ # print 'linking all templates' templates = sentence.templates onList = templates.getList('on') erList = templates.getList('eventrate') outcomeMeasurements = templates.getOutcomeMeasurementList() groupList = sentence.abstract.entities.lists['group'] outcomeList = sentence.abstract.entities.lists['outcome'] nON = len(onList) nER = len(erList) nGroups = len(groupList) nOutcomes = len(outcomeList) nGroupOutcomePairs = nGroups * nOutcomes if (nON + nER) == 0 or nGroupOutcomePairs == 0: return # missing key information cannot make any associations goPairs = [] for group in groupList: for outcome in outcomeList: # goPairs.append((group,outcome)) (nMatched, nUnmatched1, nUnmatched2) = group.partialSetMatch(outcome) if self.groupOutcomeOverlap(group, outcome) == False: # overlap between group/outcome may be no more than ONE word and this may be no more than 1/3 of smaller mention goPairs.append((group,outcome)) else: print sentence.abstract.id, '#### skipping:', group.rootMention().name, ';', outcome.rootMention().name nGroupOutcomePairs = len(goPairs) if nGroupOutcomePairs == 0: return # missing key information cannot make any associations # get unmatched event rates and outcome numbers unmatchedON = [] unmatchedER = [] for om in outcomeMeasurements: on = om.getOutcomeNumber() er = om.getTextEventRate() if er != None and on == None: # unmatched event rate unmatchedER.append(er) elif on != None and er == None: # unmatched number of outcomes unmatchedON.append(on) # identify as of yet unmatched event rates and outcome numbers that could potentially match each other erMatches = {} onMatches = {} for on in unmatchedON: onMatches[on] = [] for er in unmatchedER: erMatches[er] = [] for on in unmatchedON: couldCalculateER = False if on.hasAssociatedGroupSize(): calculatedER = on.eventRate() couldCalculateER = True for er in unmatchedER: if er.equivalentEventRates(calculatedER): onMatches[on].append(er) erMatches[er].append(on) else: for group in groupList: groupFV = on.getMatchFeatures(group) if groupFV != None and groupFV.prob > 0: # it is possible to associate with this group gs = group.getSize(sentenceIndex=sentence.index) if gs > 0: calculatedER = on.eventRate(groupSize=gs) couldCalculateER = True for er in unmatchedER: if er.equivalentEventRates(calculatedER): onMatches[on].append(er) erMatches[er].append(on) if couldCalculateER == False: outcomeMeasurements.remove(on.outcomeMeasurement) # discard any outcome numbers that potentially match multiple event rates for on in onMatches.keys(): if len(onMatches[on]) == 1 and len(erMatches[onMatches[on][0]]) == 1: # this outcome number is a potential match for only one event rate # similarly, the event rate is only a match for this outcome number # assume they belong to same outcome measurement erOM = er.outcomeMeasurement on.outcomeMeasurement.addEventRate(er) outcomeMeasurements.remove(erOM) # now consider all possible valid ON,ER pairings # for on in unmatchedON: # for er in unmatchedER: # if on.hasAssociatedGroupSize() == False or on.equivalentEventRates(er.eventRate()) == True: # om = OutcomeMeasurement(on) # om.addEventRate(er) # outcomeMeasurements.append(om) nOutcomeMeasurements = len(outcomeMeasurements) maxSize = max(nOutcomeMeasurements, nGroupOutcomePairs) # initialize cost matrix for matching outcome measurements with group,outcome pairs probMatrix = [] probMultiplier = 100000 for omIdx in range(maxSize): probMatrix.append([]) for goIdx in range(maxSize): if omIdx < nOutcomeMeasurements and goIdx < nGroupOutcomePairs: om = outcomeMeasurements[omIdx] (group, outcome) = goPairs[goIdx] er = om.getTextEventRate() on = om.getOutcomeNumber() if er != None: outcomeFV = er.getMatchFeatures(outcome) groupFV = er.getMatchFeatures(group) if outcomeFV == None or groupFV == None: # this quantity has no chance of being associated with either the group or outcome mention # this can happen if all mentions for the entity appear in a sentence after the quantity probG_ER = 0 probO_ER = 0 else: probO_ER = outcomeFV.prob probG_ER = groupFV.prob else: probG_ER = 1 probO_ER = 1 if on != None: # this outcome measurement has an outcome number # is this number useful? Can we compute an event rate for this group? # If not, discard this measurement (set probability to zero). # if so, is the event rate compatible with the textual event rate? # If not, discard. calculatedER = -1 gs = group.getSize(sentenceIndex=sentence.index) outcomeFV = on.getMatchFeatures(outcome) groupFV = on.getMatchFeatures(group) if outcomeFV == None or groupFV == None: # this quantity has no chance of being associated with either the group or outcome mention probG_ON = 0 probO_ON = 0 else: probO_ON = outcomeFV.prob probG_ON = groupFV.prob if on.hasAssociatedGroupSize() == False: # there is no group size already associated with the outcome number # does the group have a group size? # If so, is the resulting event rate compatible with the text one? if gs <= 0 and er == None: # there is no way to compute an event rate with this outcome measurement. # it does not add any useful information. # discard it by setting probability to zero probG_ON = 0 probO_ON = 0 elif gs > 0: # the proposed group has an associated size # we can compute an event rate for this group/outcome calculatedER = on.eventRate(groupSize=gs) if (er != None and er.equivalentEventRates(calculatedER) == False) or abs(calculatedER) > 1: # event rates are incompatible probG_ON = 0 probO_ON = 0 else: probG_ON = 1 probO_ON = 1 if er != None and on != None: probG_OM = math.sqrt(probG_ER * probG_ON) probO_OM = math.sqrt(probO_ER * probO_ON) elif er != None: probG_OM = probG_ER probO_OM = probO_ER else: # on != None probG_OM = probG_ON probO_OM = probO_ON prob = round(probG_OM * probO_OM * probMultiplier) else: prob = 0 probMatrix[omIdx].append(prob) # if sentence.abstract.id == '21600592': # for omIdx in range(maxSize): # for goIdx in range(maxSize): # if omIdx < nOutcomeMeasurements and goIdx < nGroupOutcomePairs: # om = outcomeMeasurements[omIdx] # (group, outcome) = goPairs[goIdx] # print probMatrix[omIdx][goIdx], om.statisticString(), group.name, outcome.name costMatrix = munkres.make_cost_matrix(probMatrix, lambda cost: probMultiplier - cost) m = munkres.Munkres() # print probMatrix # print costMatrix indices = m.compute(costMatrix) # threshold is (1/2)^4 threshold = 0.0625 * probMultiplier threshold = 0.0001 * probMultiplier # threshold = 0.25 * probMultiplier for omIdx, goIdx in indices: if omIdx < nOutcomeMeasurements and goIdx < nGroupOutcomePairs: prob = probMatrix[omIdx][goIdx] if prob > threshold: # this quantity and mention should be associated prob = float(prob) / probMultiplier om = outcomeMeasurements[omIdx] (group, outcome) = goPairs[goIdx] self.linkOutcomeMeasurementAssociations(om, group, outcome, prob) # record those outcome measurements that were not succefully matched to G,O if omIdx < nOutcomeMeasurements and (goIdx >= nGroupOutcomePairs or probMatrix[omIdx][goIdx] <= threshold): om = outcomeMeasurements[omIdx] prob = float(probMatrix[omIdx][goIdx])/probMultiplier if goIdx < nGroupOutcomePairs: (group, outcome) = goPairs[goIdx] else: group = None outcome = None abstract = sentence.abstract if abstract not in self.incompleteMatches: self.incompleteMatches[abstract] = [] self.incompleteMatches[abstract].append(baseassociator.OutcomeMeasurementAssociation(group, outcome, om, prob))
def optimizeMatrix(): cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost) m = Munkres() indexes = m.compute(cost_matrix) return indexes
def costify(similarity_matrix): """Transform a similarity matrix into a cost matrix.""" return munkres.make_cost_matrix(similarity_matrix, lambda s: 1 - s)
t_idx = np.argmax(score_matrix[s_idx]) if targets[s_idx, t_idx] > 0: correct.append(s_idx) else: errors.append(s_idx) total = len(correct) + len(errors) print "Right: %d/%d = %f%%, Wrong: %d/%d = %f%%" \ % (len(correct), total, 100. * len(correct) / total, len(errors), total, 100. * len(errors) / total) sys.exit() print "Finding best matching" m = munkres.Munkres() correct, errors = [], [] cost_matrix = munkres.make_cost_matrix(score_matrix, lambda cost: 1 - cost) indexes = m.compute(cost_matrix) for row, column in indexes: if targets[row, column] > 0: correct.append((row, column)) else: errors.append((row, column)) total = len(correct) + len(errors) print "Right: %d/%d = %f%%, Wrong: %d/%d = %f%%" \ % (len(correct), total, 100. * len(correct) / total, len(errors), total, 100. * len(errors) / total) # scores = cross_validation.cross_val_score( # clf, m, targets, cv=5, scoring=scoring) # print sum(predicted), sum(predicted - targets) # print metrics.classification_report(targets, predicted)
gmm_model = sklmix.GMM(n_components=3, covariance_type='full') gmm_model.fit(iris[['PW', 'PL', 'SW']]) yhat = gmm_model.predict(iris[['PW', 'PL', 'SW']]) crosstab = pd.crosstab(iris['Type'], yhat, rownames=['true'], colnames=['predicted']) print crosstab # <headingcell level=4> # Align the confusion matrix with a non-standard package # <codecell> import munkres import sys m = munkres.Munkres() cost = munkres.make_cost_matrix(crosstab.values.tolist(), lambda x : sys.maxint - x) align = m.compute(cost) print align, '\n' permute = [x[1] for x in align] new_label = np.argsort(permute) yhat_new = new_label[yhat] print pd.crosstab(iris['Type'], yhat_new, rownames=['true'], colnames=['predicted']) # <headingcell level=4> # Bridging the gap with Rpy2 # <codecell> from rpy2.robjects import r
from munkres import Munkres, print_matrix, make_cost_matrix import sys n, m = map(int, raw_input().split()) matrix = [] for i in xrange(n): matrix.append(map(int, raw_input().split())) cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost) m = Munkres() indexes = m.compute(cost_matrix) for i, j in indexes: value = matrix[i][j] print '%d,%d' % (i, j)
def cost_matrix(contingency_table): """Hungarian method assumes the goal of minimum cost, while our goal with a contigency table is maximum cost. To deal with this, the table is inverted in an additive sense. """ return make_cost_matrix(contingency_table, lambda cost: sys.maxsize - cost)
def linkTemplatesHungarian(self, sentence): """ link group size and group templates using Hungarian matching algorithm """ # print 'linking all templates' templates = sentence.templates costValueList = templates.getList('cost_value') outcomeList = templates.getList('outcome') groupList = templates.getList('group') nCostValues = len(costValueList) nGroupOutcomePairs = len(outcomeList)*len(groupList) if nGroupOutcomePairs == 0 or nCostValues == 0: return maxSize = max(nCostValues, nGroupOutcomePairs) # build list of group-outcome pairs goPairs = [] for group in groupList: for outcome in outcomeList: goPairs.append((group, outcome)) # initialize cost matrix for matching cost values with group,outcome pairs probMatrix = [] probMultiplier = 100000 for cvIdx in range(maxSize): probMatrix.append([]) for goIdx in range(maxSize): if cvIdx < nCostValues and goIdx < nGroupOutcomePairs: cv = costValueList[cvIdx] (group, outcome) = goPairs[goIdx] outcomeFV = cv.getMatchFeatures(outcome) groupFV = cv.getMatchFeatures(group) groupProb = groupFV.prob outcomeProb = outcomeFV.prob prob = round(groupProb * outcomeProb * probMultiplier) else: # no association can be made # this possible match involves either a dummy cost value or a dummy (group,outcome) pair prob = 0 probMatrix[cvIdx].append(prob) costMatrix = munkres.make_cost_matrix(probMatrix, lambda cost: probMultiplier - cost) m = munkres.Munkres() # print probMatrix # print costMatrix indices = m.compute(costMatrix) # threshold is (1/2)^4 # threshold = 0.0625 * probMultiplier threshold = 0.0001 * probMultiplier # threshold = 0.25 * probMultiplier for cvIdx, goIdx in indices: if cvIdx < nCostValues and goIdx < nGroupOutcomePairs: prob = probMatrix[cvIdx][goIdx] if prob > threshold: # this quantity and mention should be associated prob = float(prob) / probMultiplier om = costValueList[cvIdx] (group, outcome) = goPairs[goIdx] self.linkOutcomeMeasurementAssociations(om, group, outcome, prob)