def calc_hungarian_alignment_score(claim, headline):
    """Calculate the alignment score between the two texts s and t
    using the implementation of the Hungarian alignment algorithm
    provided in https://pypi.python.org/pypi/munkres/."""
    claim_tokens = nltk.word_tokenize(claim)
    headline_tokens = nltk.word_tokenize(headline)

    df = pd.DataFrame(index=claim_tokens, columns=headline_tokens, data=0.)

    for c in claim_tokens:
        for a in headline_tokens:
            df.loc[c, a] = compute_paraphrase_score(c, a)

    matrix = df.values
    cost_matrix = make_cost_matrix(matrix, lambda cost: _max_ppdb_score - cost)

    indices = _munk.compute(cost_matrix)
    total = 0.0
    for row, column in indices:
        value = matrix[row][column]
        total += value

    # Divide total revenue by size of lower dimension (n) (no. of tokens in shorter claim/headline)
    # to normalize since the algorithm will always return n pairs of indices.
    return total / float(np.min(matrix.shape))
Exemple #2
0
 def getHungarianIndices(self, referenceChainsCount, mobileChainsCount,
                         matches):
     """
     Run the hungarian algorithm from the Munkres module to determine 
     the optimal matches of chains.
         
         Args:
             referenceChainsCount: number of chains in reference
             mobileChainsCount: number of chains in mobile
             matches: the chain matches as determined by prody
         Returns:
             optimal matches based on the hungarian algorithm in the form
             of indices and the corresponding matchesMatrix for the indicies
     """
     profitStack = [None] * (referenceChainsCount * mobileChainsCount)
     matchesStack = [None] * (referenceChainsCount * mobileChainsCount)
     for element in range(0, len(matches)):
         profitStack[element] = self.hungarianProfit(
             matches[element][2], matches[element][3])
         matchesStack[element] = matches[element]
     profitMatrix = np.zeros((referenceChainsCount, mobileChainsCount))
     matchesMatrix = np.zeros((referenceChainsCount, mobileChainsCount),
                              dtype=object)
     for row in range(0, referenceChainsCount):
         for column in range(0, mobileChainsCount):
             profitMatrix[row][column] = profitStack.pop(0)
             matchesMatrix[row][column] = matchesStack.pop(0)
     profitMatrix = profitMatrix.tolist()
     cost_matrix = make_cost_matrix(profitMatrix,
                                    lambda cost: 1000.0 - cost)
     m = Munkres()
     indices = m.compute(cost_matrix)
     return indices, matchesMatrix
Exemple #3
0
def d2dAllocate(lambda_matrix, maximum_rb_allowed):
    #if multiple resource blocks are allowed for d2d users then repeat the rows
    if (not (maximum_rb_allowed == 1)):
        lambda_matrix_new = []
        for i in range(0, len(lambda_matrix)):
            for j in range(0, maximum_rb_allowed):
                lambda_matrix_new.append(lambda_matrix[i])
        lambda_matrix = lambda_matrix_new

    #convert profit matrix to cost matrix
    cost_matrix = make_cost_matrix(lambda_matrix,
                                   lambda cost: sys.maxsize - cost)

    m = Munkres()
    indexes = m.compute(
        cost_matrix
    )  #indexes contains the 2d indexes of the maximum weight allocations

    allocated_d2d_in_channels = np.zeros(len(lambda_matrix[0])) - 1

    d2d_and_indexes = []  #indexes to return

    for row, column in indexes:
        allocated_d2d_in_channels[column] = int(row / maximum_rb_allowed)

    allocated_d2d_in_channels = allocated_d2d_in_channels.astype(int)

    for i in range(0, len(allocated_d2d_in_channels)):
        if (not (allocated_d2d_in_channels[i] == -1)):
            d2d_and_indexes.append([
                allocated_d2d_in_channels[i],
                [allocated_d2d_in_channels[i], i]
            ])

    return d2d_and_indexes
    def computePerfectMatching(self):
        # transform dict to array
        inputMatrix = self.transformDictToArray()

        # compute perfect matching
        cost_matrix = make_cost_matrix(inputMatrix,
                                       lambda cost: sys.maxsize - cost)
        m = Munkres()
        perfect_indexes = m.compute(cost_matrix)

        # produce the final result
        # find if rowIndex is in table1 or table2
        # if self.table1[0]['head']['row']:
        #     # index a is in table1
        #     for (a, b) in perfect_indexes:
        #         # find elements with indexes a,b in the left and right tables
        #
        #         self.toBeJoined.append(())
        # else:
        #     # index a is in table2
        #     for (a, b) in perfect_indexes
        #
        #

        return perfect_indexes
Exemple #5
0
    def _find_optimal_match(self):
        matrix = [[0 for _ in range(len(self._orders))]
                  for _ in range(len(self._couriers))]
        viewed_rows = {i: 0 for i in range(len(self._couriers))}

        for i, courier in enumerate(self._couriers):
            for j, order in enumerate(self._orders):
                revenue_from_completion = self.revenue_from_completing_order(
                    courier, order)
                if revenue_from_completion == float('-inf'):
                    matrix[i][j] = DISALLOWED
                else:
                    matrix[i][j] = revenue_from_completion
        elements_to_remove = [
            x for x in viewed_rows if viewed_rows[x] == len(self._orders)
        ]
        matrix = np.delete(matrix, elements_to_remove, axis=0)
        if elements_to_remove:
            self._couriers = [
                x for idx, x in enumerate(self._couriers)
                if idx not in elements_to_remove
            ]
        if matrix.size == 0:
            return None

        cost_matrix = make_cost_matrix(
            matrix, lambda cost: (sys.maxsize - cost)
            if (cost != DISALLOWED) else DISALLOWED)
        indexes = self.m.compute(cost_matrix)
        if not indexes:
            return None
        return indexes
Exemple #6
0
def munkres_score(gt, pred):
    """

    :param gt: a list of lists, each containing ints
    :param pred: a list of lists, each containing ints
    :return: accuracy
    """

    # Combine all the sequences into one long sequence for both gt and pred
    gt_combined = np.concatenate(gt)
    pred_combined = np.concatenate(pred)

    # Make sure we're comparing the right shapes
    assert(gt_combined.shape == pred_combined.shape)

    # Build out the contingency matrix
    # This follows the methodology suggested by Zhou, De la Torre & Hodgkins, PAMI 2013.
    mat = contingency_matrix(gt_combined, pred_combined)

    # We need to make the cost matrix
    # We use the fact that no entry can exceed the total length of the sequence
    cost_mat = make_cost_matrix(mat, lambda x: gt_combined.shape[0] - x)

    # Apply the Munkres method (also called the Hungarian method) to find the optimal cluster correspondence
    m = Munkres()
    indexes = m.compute(cost_mat)

    # Pull out the associated 'costs' i.e. the cluster overlaps for the correspondences we've found
    cluster_overlaps = mat[list(zip(*indexes))]

    # Now compute the accuracy
    accuracy = np.sum(cluster_overlaps)/float(np.sum(mat))

    return accuracy
Exemple #7
0
    def calc_hungarian_alignment_score(self, s, t):
        """Calculate the alignment score between the two texts s and t
        using the implementation of the Hungarian alignment algorithm
        provided in https://pypi.python.org/pypi/munkres/."""
        s_toks = get_tokenized_lemmas(s)
        t_toks = get_tokenized_lemmas(t)
        #print("#### new ppdb calculation ####")
        #print(s_toks)
        #print(t_toks)
        df = pd.DataFrame(index=s_toks, columns=t_toks, data=0.)

        for c in s_toks:
            for a in t_toks:
                df.ix[c, a] = self.compute_paraphrase_score(c, a)

        matrix = df.values
        cost_matrix = make_cost_matrix(matrix, lambda cost: _max_ppdb_score - cost)

        indexes = _munk.compute(cost_matrix)
        total = 0.0
        for row, column in indexes:
            value = matrix[row][column]
            total += value
        #print(s + ' || ' + t + ' :' + str(indexes) + ' - ' + str(total / float(np.min(matrix.shape))))

        # original procedure returns indexes and score - i do not see any use for the indexes as a feature
        # return indexes, total / float(np.min(matrix.shape))
        return total / float(np.min(matrix.shape))
Exemple #8
0
    def score(self, seq_gt, seq_pred):
        seq_gt = self.prep_seq(seq_gt)
        seq_pred = self.prep_seq(seq_pred)
        m, n = len(seq_gt), len(seq_pred)  # length of two sequences

        if m == 0:
            return 1.
        if n == 0:
            return 0.

        similarities = torch.zeros((m, n))
        for i in range(m):
            for j in range(n):
                a = self.vectors[seq_gt[i]]
                b = self.vectors[seq_pred[j]]
                a = torch.from_numpy(a)
                b = torch.from_numpy(b)
                similarities[i, j] = torch.mean(
                    F.cosine_similarity(a.unsqueeze(0),
                                        b.unsqueeze(0))).unsqueeze(-1)

        similarities = (similarities + 1) / 2
        similarities = similarities.numpy()
        ass = self.munkres.compute(munkres.make_cost_matrix(similarities))

        intersection_score = .0
        for a in ass:
            intersection_score += similarities[a]
        iou_score = intersection_score / (m + n - intersection_score)

        return iou_score
def similar(list_current,list_called):
	global P,total
	# comparing the length of two files to compare smaller length to bigger 
	if len(list_current)<len(list_called):
		# calling comparison function to compare both files line by line for similarity 
		similarity = comparison(list_current,list_called)
		# storing the lenght of smaller 
		P = len(list_current)
		point=[[0 for x in range(len(list_called))] for y in range(len(list_current))]
	else:
		# calling comparison function to compare both files line by line for similarity
		similarity = comparison(list_called,list_current)
		P = len(list_called)
		point=[[0 for x in range(len(list_current))] for y in range(len(list_called))]
	# calling functions of munkres to form maximum weighted bipartite matching graph
	graph_matrix = make_cost_matrix(similarity, lambda cost: 1.0 - cost)
	m = Munkres()
	indexes =m.compute(graph_matrix)
	total = 0
	for row, column in indexes:
		# forming list of points(lines) of similarity between two files
		value = similarity[row][column]
		if value>0.0:
			total += 1
			point[row][column]=1
	return point
 def make_cost_matrix(profit_matrix, inversion_function):
     """
     **DEPRECATED**
     Please use the module function ``make_cost_matrix()``.
     """
     import munkres
     return munkres.make_cost_matrix(profit_matrix, inversion_function)
Exemple #11
0
def maximum_weight_bipartite(matrix):
    cost_matrix = make_cost_matrix(matrix, lambda cost: 100000 - cost)

    m = Munkres()
    indices = m.compute(cost_matrix)

    return indices
Exemple #12
0
def maxi():
    #matrix = [[20, 16, 22, 18],
    #        [25, 28, 15, 21],
    #       [27, 20, 23, 26],
    #      [24, 22, 23, 22]]
    f = open('newds.txt')
    n = int(f.readline())
    matrix = []
    for i in range(n):
        list1 = map(int, (f.readline()).split())
        matrix.append(list1)
    cost_matrix = make_cost_matrix(matrix)
    m = Munkres()
    indexes = m.compute(cost_matrix)
    print_matrix(matrix, msg='Highest profits through this matrix:')
    total = 0
    for row, column in indexes:
        value = matrix[row][column]
        total += value
        print('(%d, %d) -> %d' % (row, column, value))
    print('total profit=%d' % total)
    start_time = time.clock()
    timex = time.clock() - start_time
    time2 = ("Time =", timex, "seconds")
    print(time2)
    #timex1 = string(timex)
    timetext = open('times2.txt', 'a+')
    timetext.write(str(timex))
    main()
Exemple #13
0
def filter_rects(all_rects,
                 threshold,
                 net_config,
                 input_rects=[],
                 max_threshold=1.0):
    """Takes in all_rects and based on the threshold carries out the stitching process
    as described in the paper."""

    accepted_rects = input_rects

    for i in range(0, net_config["grid_height"], 1):
        for j in range(0, net_config["grid_width"], 1):
            relevant_rects = []
            current_rects = [
                r for r in all_rects[i][j] if r.confidence > threshold
            ]

            for other in accepted_rects:
                for current in current_rects:
                    if other.overlaps(current):
                        relevant_rects.append(other)
                        break

            if len(relevant_rects) == 0 or len(current_rects) == 0:
                accepted_rects += current_rects
                continue

            matrix = []
            for c in current_rects:
                row = []
                for a in relevant_rects:
                    row.append(1000)
                    if a.overlaps(c):
                        row[-1] -= 100
                    row[-1] += a.distance(c) / 1000.0
                matrix.append(row)

            m = Munkres()
            cost_matrix = make_cost_matrix(matrix, lambda x: x)
            indices = m.compute(matrix)

            bad = set()
            for row, column in indices:
                c = current_rects[row]
                a = relevant_rects[column]
                if c.confidence > max_threshold:
                    bad.add(row)
                    continue
                if c.overlaps(a):
                    if c.confidence > a.confidence and c.iou(a) > 0.7:
                        c.true_confidence = a.confidence
                        accepted_rects.remove(a)
                    else:
                        bad.add(row)

            for k in range(len(current_rects)):
                if k not in bad:
                    accepted_rects.append(current_rects[k])

    return accepted_rects
def KM_mapping(action, VEHICLES, request_selected, vehicle,
               current_time):  # ?????
    '''
    :param prob_weights: a_prob
    :return: matching final action,"indexes":
    '''
    print('cal_profit begin')
    profit_matrix = cal_profit(VEHICLES, request_selected, vehicle,
                               current_time)
    print('cal_profit end')
    action = action.reshape([1, VEHICLES_NUMS * REQUEST_NUMS])
    action = np.apply_along_axis(lambda x: round(x[0], 2), 0, action)
    action_weights = action.reshape([VEHICLES_NUMS, REQUEST_NUMS])
    km_matrix = profit_matrix * action_weights
    # km_weights = make_cost_matrix(km_matrix, lambda item: (maxsize - item) if item != 0 else DISALLOWED)
    km_weights = make_cost_matrix(km_matrix, lambda item: (400 - item))
    m = Munkres()
    print('km begin')
    indexes = m.compute(km_weights)
    print('km_end')
    print_matrix(profit_matrix, msg='Highers profit through this matrix:')
    total = 0
    temp_indexes = copy.deepcopy(indexes)
    for row, column in temp_indexes:
        value = profit_matrix[row][column]
        if value == 0:
            indexes.remove((row, column))
        total += value
    return indexes, total


# KM_mapping([])
Exemple #15
0
def KM_mapping(action, REQUESTS, VEHICLES, request_selected, vehicle,
               current_time):  # ?????
    '''
    :param prob_weights: a_prob
    :return: matching final action,"indexes":
    '''
    # prob_weights = [5, DISALLOWED, 70, 0,
    #           10, 3, 2, 3,
    #           9, DISALLOWED, 4, 5,
    #             1,2,3,4,
    #                 90,5,1,DISALLOWED]
    profit_matrix = cal_profit(REQUESTS, VEHICLES, request_selected, vehicle,
                               current_time)
    km_matrix = profit_matrix * action.reshape([VEHICLES_NUMS, REQUEST_NUMS])
    km_weights = make_cost_matrix(
        km_matrix, lambda item: (maxsize - item) if item != 0 else DISALLOWED)
    # matrix = np.array(prob_weights)
    # matrix = np.reshape(matrix, [VEHICLES_NUMS, REQUEST_NUMS])
    # matrix = matrix.transpose()
    # cost_matrix = make_cost_matrix(matrix, lambda cost: (maxsize - cost) if (cost != DISALLOWED) else DISALLOWED)
    m = Munkres()
    indexes = m.compute(km_weights)
    print_matrix(profit_matrix, msg='Highers profit through this matrix:')
    total = 0
    for row, column in indexes:
        # print(row, column)
        value = profit_matrix[row][column]
        total += value
        # print('(%d, %d) -> %d' % (row, column, value))
        # print('total profit: %d' % total)
    return indexes, total


# KM_mapping([])
    def get_suitability_score(customer_list, product_list):
        '''
        calculate the total maximum suitability score
        by using munkres algorithm and returns a detailed
        customer_product_enties & total suitability score
        '''
        suitability_scores = []
        customer_suitability_scores = []

        for customer in customer_list:
            for product in product_list:
                customer_suitability_scores.append(SuitabilityScore.calculate_suitability_score(customer,product))
            suitability_scores.append(customer_suitability_scores)
            customer_suitability_scores = []


        customer_product_entries = []
        cost_matrix = make_cost_matrix(suitability_scores, lambda cost: 1e10 - cost)
        munkres = Munkres()
        indexes = munkres.compute(cost_matrix)
        total_suitability_score = 0
        for customer_index, product_index  in indexes:
            suitability_score = suitability_scores[customer_index][product_index]
            total_suitability_score += suitability_score
            suitability_score_entry = SuitabilityScoreEntry(customer_list[customer_index],product_list[product_index],suitability_score)
            customer_product_entries.append(suitability_score_entry)
            #print(customer_index,product_index)

        return customer_product_entries,total_suitability_score
 def make_cost_matrix(profit_matrix, inversion_function):
     """
     **DEPRECATED**
     Please use the module function ``make_cost_matrix()``.
     """
     import munkres
     return munkres.make_cost_matrix(profit_matrix, inversion_function)
Exemple #18
0
def ngramset_edit_distance(set1, set2):
    def get_yxgraph_distance(x, y):
        import math
        if (x == y):
            return 0
        elif (x > y):
            return math.sqrt(math.pow((x - y), 2))
        else:
            return -math.sqrt(math.pow((y - x), 2))

    matrix = _ngram_matrix(set1, set2)
    # with open("matrix", 'wb') as file:
    #     file.write(str(matrix))

    cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost)
    m = Munkres()
    indexes = m.compute(cost_matrix)
    # total = 0.0
    max_matrix = []
    xygraph_distance_list = []
    for row, column in indexes:
        value = matrix[row][column]
        max_matrix.append(value)
        xygraph_distance_list.append(get_yxgraph_distance(row, column))
        # total += value
    edit_distance = numpy.mean(max_matrix) / 100
    variance = numpy.var(max_matrix)

    # if edit_distance > 0.7:
    #     sim2 = _similarity(xygraph_distance_list, 2)
    #     sim3 = _similarity(xygraph_distance_list, 3)
    #     return edit_distance, variance, sim2, sim3, xygraph_distance_list

    return edit_distance, variance, None, None, xygraph_distance_list
Exemple #19
0
def perform_alignment(ref_instances, sys_instances, kernel, maximize=True):
    disallowed = {}

    max_sim = 0
    sim_matrix, component_matrix = [], []
    for s_i, s in enumerate(sys_instances):
        sim_row = []
        comp_row = []
        for r_i, r in enumerate(ref_instances):
            sim, comp = kernel(r, s)

            sim_row.append(sim)
            comp_row.append(comp)

            if sim == DISALLOWED:
                disallowed[(s_i, r_i)] = True
            else:
                if sim > max_sim: max_sim = sim

        sim_matrix.append(sim_row)
        component_matrix.append(comp_row)

    if maximize:

        def _mapper(sim):
            return max_sim + 1 if sim == DISALLOWED else (max_sim + 1) - sim
    else:

        def _mapper(sim):
            return max_sim + 1 if sim == DISALLOWED else sim

    matrix = make_cost_matrix(sim_matrix, _mapper)

    correct_detects, false_alarms, missed_detects = [], [], []
    unmapped_sys = set(range(0, len(sys_instances)))
    unmapped_ref = set(range(0, len(ref_instances)))
    if len(matrix) > 0:
        for s_i, r_i in Munkres().compute(matrix):
            if disallowed.get((s_i, r_i), False):
                continue

            unmapped_sys.remove(s_i)
            unmapped_ref.remove(r_i)
            correct_detects.append(
                AlignmentRecord(ref_instances[r_i], sys_instances[s_i],
                                sim_matrix[s_i][r_i],
                                component_matrix[s_i][r_i]))

    for r_i in unmapped_ref:
        missed_detects.append(
            AlignmentRecord(ref_instances[r_i], None, None, None))

    for s_i in unmapped_sys:
        false_alarms.append(
            AlignmentRecord(None, sys_instances[s_i], None, None))

    return (correct_detects, missed_detects, false_alarms)
def test_profit():
    profit_matrix = [[94, 66, 100, 18, 48], [51, 63, 97, 79, 11],
                     [37, 53, 57, 78, 28], [59, 43, 97, 88, 48],
                     [52, 19, 89, 60, 60]]
    import sys
    cost_matrix = munkres.make_cost_matrix(profit_matrix,
                                           lambda cost: sys.maxsize - cost)
    indices = m.compute(cost_matrix)
    profit = sum([profit_matrix[row][column] for row, column in indices])
    assert_equals(profit, 392)
def max_match(matrix):
	cost_matrix = make_cost_matrix(matrix, lambda cost: 100.0-cost)
	m = Munkres()
	indexes = m.compute(cost_matrix)
	#print_matrix(matrix, msg='Lowest cost through this matrix:')
	total = 0
	for row, column in indexes:
	    value = matrix[row][column]
	    total += value
	    #print '(%d, %d) -> %d' % (row, column, value)
	return total
Exemple #22
0
def filter_rects(all_rects, threshold, input_rects=[], max_threshold=1.0, config=None):
    """Takes in all_rects and based on the threshold carries out the stitching process
    as described in the paper."""

    accepted_rects = input_rects

    for i in range(0, config["grid_height"], 1):
        for j in range(0, config["grid_width"], 1):
            relevant_rects = []
            current_rects = [r for r in all_rects[i][j] if r.confidence > threshold]

            for other in accepted_rects:
                for current in current_rects:
                    if other.overlaps(current):
                        relevant_rects.append(other)
                        break

            if len(relevant_rects) == 0 or len(current_rects) == 0:
                accepted_rects += current_rects
                continue

            matrix = []
            for c in current_rects:
                row = []
                for a in relevant_rects:
                    row.append(1000)
                    if a.overlaps(c):
                        row[-1] -=100
                    row[-1] += a.distance(c) / 1000.0
                matrix.append(row)

            m = Munkres()
            cost_matrix = make_cost_matrix(matrix, lambda x: x)
            indices = m.compute(matrix)

            bad = set()
            for row, column in indices:
                c = current_rects[row]
                a = relevant_rects[column]
                if c.confidence > max_threshold:
                    bad.add(row)
                    continue
                if c.overlaps(a):
                    if c.confidence > a.confidence and c.iou(a) > 0.7:
                        c.true_confidence = a.confidence
                        accepted_rects.remove(a)
                    else:
                        bad.add(row)

            for k in range(len(current_rects)):
                if k not in bad:
                    accepted_rects.append(current_rects[k])

    return accepted_rects
Exemple #23
0
def compute_matches(affinity_scores, j1_pts, j2_pts):
    matching_results = []
    match_confidence_threshold = CONFIG.match_confidence_threshold
    j1_count, j2_count = affinity_scores.shape
    indices = MUNKRES_INSTANCE.compute(make_cost_matrix(affinity_scores.tolist(), inversion_function=lambda x : 2 - x))
    
    for row,col in indices:
        if(affinity_scores[row,col]>match_confidence_threshold):
            matching_results.append((j1_pts[row], j2_pts[col], affinity_scores[row,col]))
            
    return matching_results
Exemple #24
0
 def generate_transformed_matrix(self):
     confusion = self.mat
     confusion = confusion.T
     cost_matrix = munkres.make_cost_matrix(
         confusion, lambda cost: sys.long_info.sizeof_digit - cost)
     m = munkres.Munkres()
     indexes = m.compute(cost_matrix)
     new_mat = np.zeros(confusion.shape)
     for i in range(len(indexes)):
         new_mat[:, i] = confusion[:, indexes[i][1]]
     return new_mat
Exemple #25
0
def return_munkres_result(matrix):

    cost_matrix = make_cost_matrix(matrix, lambda cost: (sys.maxsize - cost) if (cost != DISALLOWED) else DISALLOWED)
    m = Munkres()
    indexes = m.compute(cost_matrix)
    bestMatches = []

    for row, column in indexes:
        value = matrix[row][column]
        bestMatches.append(value)

    return bestMatches
Exemple #26
0
def show_csv_file(request, csv_id):
    data = CsvFile.objects.filter(id=csv_id).first().text.strip()
    mentors = set()
    mentees = set()
    lines = data.splitlines()
    for line in lines[1:]:
        if len(line) <= 1:
            continue
        print(line)
        mentor, mentee, utility = [x.strip() for x in line.split(',')]
        mentors.add(mentor)
        mentees.add(mentee)

    matrix = [[DISALLOWED] * len(mentees) for x in mentors]
    mentors = {x[1]: x[0] for x in enumerate(sorted(mentors))}
    mentees = {x[1]: x[0] for x in enumerate(sorted(mentees))}
    for line in lines[1:]:
        if len(line) <= 1:
            continue
        mentor, mentee, utility = [x.strip() for x in line.split(',')]
        utility = float(utility)
        matrix[mentors[mentor]][mentees[mentee]] = utility

    cost_matrix = make_cost_matrix(
        matrix, lambda cost: (200 - cost)
        if (cost != DISALLOWED) else DISALLOWED)
    m = Munkres()
    indexes = m.compute(cost_matrix)

    mentors = {v: k for k, v in mentors.items()}
    mentees = {v: k for k, v in mentees.items()}
    used_mentors = set()
    used_mentees = set()
    table = []
    for row, column in indexes:
        value = matrix[row][column]
        mentor = mentors[row]
        mentee = mentees[column]
        table.append([mentor, mentee, value])
        used_mentors.add(mentor)
        used_mentees.add(mentee)

    mentors = set([x for x in mentors.values()])
    mentees = set([x for x in mentees.values()])
    unused_mentees = mentees - used_mentees
    unused_mentors = mentors - used_mentors
    table = sorted(table, key=lambda x: x[0])
    context = {
        'table': table,
        'unused_mentors': unused_mentors,
        'unused_mentees': unused_mentees
    }
    return render(request, 'show_table.html', context=context)
def compute_agreements(similarity):
    import munkres
    import numpy as np

    m = munkres.Munkres()
    print("Computing mapping...")
    similarity = munkres.make_cost_matrix(similarity, lambda cost: 1 - cost)
    indexes = m.compute(similarity)

    agreement = np.sum([1 - similarity[r][c] for r, c in indexes]) / len(similarity)
    print("Agreement:", agreement)
    return agreement
Exemple #28
0
def test_profit_float():
    profit_matrix = [[94.01, 66.02, 100.03, 18.04, 48.05],
                     [51.06, 63.07, 97.08, 79.09, 11.1],
                     [37.11, 53.12, 57.13, 78.14, 28.15],
                     [59.16, 43.17, 97.18, 88.19, 48.2],
                     [52.21, 19.22, 89.23, 60.24, 60.25]]
    import sys
    cost_matrix = munkres.make_cost_matrix(profit_matrix,
                                           lambda cost: sys.maxsize - cost)
    indices = m.compute(cost_matrix)
    profit = sum([profit_matrix[row][column] for row, column in indices])
    assert profit == pytest.approx(362.65)
def calculateSimilarity(sent1, sent2):
	# Case Correction
	sent1 = sent1.lower()
	sent2 = sent2.lower()

	# Tokenization
	tokens1 = word_tokenize(sent1)
	tokens2 = word_tokenize(sent2)

	# Remove punctuations
	tokens1 = [x for x in tokens1 if x not in string.punctuation]
	tokens2 = [x for x in tokens2 if x not in string.punctuation]

	# Remove Stopwords
	# stopWords = set(stopwords.words('english'))
	# tokens1 = [x for x in tokens1 if x not in stopWords]
	# tokens2 = [x for x in tokens2 if x not in stopWords]

	# Lemmatization
	# lemmatizer = WordNetLemmatizer()
	# tokens1 = [lemmatizer.lemmatize(x) for x in tokens1]
	# tokens2 = [lemmatizer.lemmatize(x) for x in tokens2]

	# Model has token?
	tokens1 = [x for x in tokens1 if x in model.vocab]
	tokens2 = [x for x in tokens2 if x in model.vocab]

	if len(tokens1) > 0 and len(tokens2) > 0:
		m = Munkres()

		pairMatrix = []
		for t1 in tokens1:
			tmpList = []
			for t2 in tokens2:
				tmpList.append(100 * JSD(model[t1], model[t2]))
			pairMatrix.append(tmpList)

		cost_matrix = make_cost_matrix(pairMatrix, lambda cost: 100 - cost)
		indexes = m.compute(cost_matrix)
		# print_matrix(pairMatrix, msg='Lowest cost through this matrix:')
		total = 0
		for row, column in indexes:
			value = pairMatrix[row][column]
			total += value
		# print('(%d, %d) -> %d' % (row, column, value))
		# print('total cost: %d' % total)
		# print(total / len(indexes))

		return total / len(indexes) / 100
		# return 2 * total / (len(tokens1) + len(tokens2)) / 100
	else:
		return 0
Exemple #30
0
def KM_mapping(REQUESTS, VEHICLES, request_selected, vehicle, current_time):
    profit_matrix = cal_profit(REQUESTS, VEHICLES, request_selected, vehicle,
                               current_time)
    km_weights = make_cost_matrix(
        profit_matrix, lambda item: (maxsize - item)
        if item != 0 else DISALLOWED)
    m = Munkres()
    indexes = m.compute(km_weights)
    total = 0
    for row, column in indexes:
        value = profit_matrix[row][column]
        total += value
    return indexes, total
Exemple #31
0
def compute_agreements(similarity):
    import munkres
    import numpy as np

    m = munkres.Munkres()
    print("Computing mapping...")
    similarity = munkres.make_cost_matrix(similarity, lambda cost: 1 - cost)
    indexes = m.compute(similarity)

    agreement = np.sum([1 - similarity[r][c]
                        for r, c in indexes]) / len(similarity)
    print("Agreement:", agreement)
    return agreement
Exemple #32
0
def test_profit():
    profit_matrix = [[94, 66, 100, 18, 48],
                     [51, 63, 97, 79, 11],
                     [37, 53, 57, 78, 28],
                     [59, 43, 97, 88, 48],
                     [52, 19, 89, 60, 60]]
    import sys
    cost_matrix = munkres.make_cost_matrix(
        profit_matrix, lambda cost: sys.maxsize - cost
    )
    indices = m.compute(cost_matrix)
    profit = sum([profit_matrix[row][column] for row, column in indices])
    assert_equals(profit, 392)
def calculateIdealComposition(lineup, composition):
    m = Munkres()

    matrix = buildMatrix(lineup, composition)
    cost_matrix = munkres.make_cost_matrix(matrix, lambda cost: int(100 - (cost * 100)))
    indexes = m.compute(cost_matrix)

    picks = []

    for index in indexes:
        picks.append((lineup[index[0]], composition[index[1]]))

    return picks
Exemple #34
0
def cluster_assignment(id2label_1, id2label_2):
    """
    Assignes cluster names from first clusterisation to cluster names of second clusterisation
    using Hungarian algorithm of the assignment problem (maximising the similarity between clusters
    from different clusterisations)
    Labels could have different lengths and contain different types of objects

    Arguments:
        id2label_1: dict of id to label from first clusterisation
        id2label_2: dict of id to label from second clusterisation

    Returns:
        dict: how to transform first labels to second
    """

    # Get intersection of two dicts
    ids = []
    labels_1, labels_2 = [], []
    for news_id, label_1 in id2label_1.items():
        if news_id in id2label_2:
            ids.append(news_id)
            labels_1.append(label_1)
            labels_2.append(id2label_2[news_id])

    n_clusters_1 = len(set(labels_1))
    n_clusters_2 = len(set(labels_2))

    # Encode labels so they would be from 0 to max
    l_encoder_1 = LabelEncoder().fit(labels_1)
    labels_1_transf = l_encoder_1.transform(labels_1)
    l_encoder_2 = LabelEncoder().fit(labels_2)
    labels_2_transf = l_encoder_2.transform(labels_2)

    # Create matrix of distances between two clusters
    matrix = [[0] * n_clusters_2 for i in range(n_clusters_1)]
    for i in range(min(len(labels_1), len(labels_2))):
        matrix[labels_1_transf[i]][labels_2_transf[i]] += 1

    # Compute Munkres (Hungarian) algorithm
    cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxsize - cost)
    m = Munkres()
    indices = m.compute(cost_matrix)

    # Transform labels back
    transform = {}
    for (label_1, label_2) in indices:
        transform[l_encoder_1.inverse_transform(
            label_1)] = l_encoder_2.inverse_transform(label_2)

    return transform
Exemple #35
0
    def _find_best_permutation(self, spectral, spatial, idx_constant):
        '''Finds the best permutation of classes in spectral and spatial model.

        Args:
            spectral (np.array): Conditional log-likelihood of spectral model 
                                 (as in Eq.19) in [1], shape (N,T,F)
            spatial (np.array): Conditional log-likelihood of spatial model 
                                (as in Eq.19) in [1], shape (N,T,F)
            idx_constant (tuple or int) indices of axis which have constant permutation
                Examples:
                    idx_constant = (1,2) 
                        -> for all time frames and frequency bins 
                           the permutation is constant 
                           (finding 1 global permutation)
                    idx_constant = 1
                        -> for all time frames the permutation is constant
                           (finding permutation for each frequency)

        Returns:
            permutations (dict): mapping tuples of time and frequency indices
                                 to the best permutation. For constant indices, 
                                 the map contains only index 0.
                Examples:
                    permutations = {(0,0) : [2, 0, 1]}
                        -> one global permutation (idx_constant = (1,2))
                        -> spectral comp. 0 corresponds to spatial comp. 2
                        -> spectral comp. 1 corresponds to spatial comp. 0
                        -> spectral comp. 2 corresponds to spatial comp. 1

        [1] Integration of variational autoencoder and spatial clustering 
            for adaptive multi-channel neural speech separation; 
            K. Zmolikova, M. Delcroix, L. Burget, T. Nakatani, J. Cernocky
        '''
        if isinstance(idx_constant, int):
            idx_constant = (idx_constant, )
        idx_constant = tuple([i + 1 for i in idx_constant])

        perm_scores = logsumexp(spectral[:, None, :, :] +
                                spatial[None, :, :, :],
                                axis=idx_constant)
        perm_scores = np.expand_dims(perm_scores, idx_constant)

        permutations = {}
        for i1, i2 in np.ndindex(perm_scores.shape[-2:]):
            idx_perm = Munkres().compute(
                make_cost_matrix(perm_scores[:, :, i1, i2]))
            idx_perm.sort(key=lambda x: x[0])
            permutations[i1, i2] = [i[1] for i in idx_perm]

        return permutations
Exemple #36
0
def matching(male_distances):
    cost_matrix = munkres.make_cost_matrix(male_distances,
                                           lambda cost: 1.0 - cost)
    m = Munkres()
    indices = m.compute(cost_matrix)
    total = 0.0
    pairings = {}
    for row, column in indices:
        value = male_distances[row][column]
        total += value
        pairings[column] = [row]
    #print "Satish was here"
    print 'total profit=%f' % total
    return pairings
Exemple #37
0
  def symmetric_matching(table):
    """Cluster matching that maximizes the total number of elements in common
    while matching at most one cluster to another.
    
    >>> symmetric_matching([[2, 2, 1], [1, 0, 2]])
    [(0, 0), (1, 2)]
    >>> symmetric_matching([[3, 2, 1], [2, 0, 1]])
    [(0, 0), (1, 2)]
    """
    maximum = max(it.chain(*table))
    cost_matrix = make_cost_matrix(table, lambda x: maximum - x)
    indices = Munkres().compute(cost_matrix)

    return indices
def make_match(advertisers, persons, ctrfunc = funky_ctr):
    # make the ctr matrix
    ctr_matrix = make_ctr_matrix(advertisers, persons, ctrfunc)

    # convert it to a cost matrix by subtracting all values from a larger value
    cost_matrix = make_cost_matrix(ctr_matrix, lambda ctr: sys.maxsize - ctr)

    # compute the match
    match = Munkres().compute(cost_matrix)

    # elements in match are two-element lists, where the first is the
    # index into advertisers and second is index into persons

    # compute the total ctr by looking up the match elements in the ctr
    # matrix
    total_ctr = sum(map(lambda pair: ctr_matrix[pair[0]][pair[1]], match))

    return match, total_ctr
Exemple #39
0
def do_hungarian_assignment(dict_a, dict_b, cost_func, yield_condt, cost_matrix_func=None):
    for ka  in dict_a.keys():
        if ka in dict_b:
            ia  = dict_a[ka]
            ib  = dict_b[ka]
            mat = [ [cost_func(a,b) for b in ib] for a in ia]
            #max similarity calculation
            c_mat = None
            if cost_matrix_func is not None:
                c_mat = make_cost_matrix(mat,cost_matrix_func)
            else:
                c_mat = mat
            indexes = MUNKR.compute(c_mat)
            for row, col in indexes:
                # yield only if condition satisfied
                if yield_condt(mat[row][col]):
                    #print '(%d, %d) -> %d' % (row, col, mat[row][col])
                    yield ia[row], ib[col]
Exemple #40
0
def assignDuties(dayA, dayB):
    if len(dayA) != len(dayB):
        print "Illegal!! number of duties should be equal every day"
        exit(1)
    matrix = createMatrix(len(dayA))
    ##Fill the matrix with the similarities
    for i in range(len(dayA)):
        for j in range(len(dayA)):
            matrix[i][j] = calcSimilarity(dayA[i], dayB[j])
    ##The following line is called to make sure we find the maximum sum and not the minimum
    ##Note that since all similarities are in [0,1] 2 is bigger than all.
    cost_matrix = make_cost_matrix(matrix, lambda cost: 2 - cost)
    m = Munkres()
    ##Indexes will contain the assignment
    indexes = m.compute(cost_matrix)
    res = []
    for row, column in indexes:
        res.append(("Driver "+str(row + 1) ,("Day 1 duty: "+str(row + 1),"Day 2 duty: "+str(column + 1))))
    return res
def get_best_matching(source_corpus, target_corpus, scores):
    stripper = LanguageStripper()
    err = 0

    m = munkres.Munkres()
    cost_matrix = munkres.make_cost_matrix(scores, lambda cost: 1 - cost)
    indexes = m.compute(cost_matrix)

    for row, column in indexes:
        s_url = source_corpus.keys()[row]
        t_url = target_corpus.keys()[column]
        success = stripper.strip(t_url) == stripper.strip(s_url)
        if not success:
            err += 1
        # sys.stdout.write("%f\t%s\t%s\t%s\n" %
        #                  (scores[row, column], success, s_url, t_url))

    n = min(len(source_corpus), len(target_corpus))
    sys.stderr.write("Correct: %d out of %d = %f%%\n" %
                     (n - err, n, (1. * n - err) / n))
Exemple #42
0
def hungarianAssignment(cases, controls, numberOfControlsPerCase):
    selectedControls = list()
    convFactor = 100000.00
    m = list()
    for control in controls:
        row = list()
        for _ in xrange(numberOfControlsPerCase):
            row += [(control.relatedTo.get(case) if (control.relatedTo.has_key(case)) else 0) for case in cases]
        m.append(row)
    cm = munkres.make_cost_matrix(m, lambda cost: convFactor - cost * convFactor)
    matrix = cm
    m = Munkres()
    indexes = m.compute(matrix)
    total = 0
    for row, column in indexes:
        value = matrix[row][column]
        total += value
        if value < convFactor:
            selectedControls.append(controls[row % len(controls)])
    print("\tassignment kic score: %f" % (float(len(indexes) * convFactor - total) / convFactor))
    print("\tall kic score: %f" % (kicScore(cases, selectedControls)))
    return [person.id for person in selectedControls]
def calc_hungarian_alignment_score(s, t):
    """Calculate the alignment score between the two texts s and t
    using the implementation of the Hungarian alignment algorithm
    provided in https://pypi.python.org/pypi/munkres/."""
    s_toks = get_tokenized_lemmas(s)
    t_toks = get_tokenized_lemmas(t)

    df = pd.DataFrame(index=s_toks, columns=t_toks, data=0.)

    for c in s_toks:
        for a in t_toks:
            df.ix[c, a] = compute_paraphrase_score(c, a)

    matrix = df.values
    cost_matrix = make_cost_matrix(matrix, lambda cost: _max_ppdb_score - cost)

    indexes = _munk.compute(cost_matrix)
    total = 0.0
    for row, column in indexes:
        value = matrix[row][column]
        total += value
    return indexes, total / float(np.min(matrix.shape))
    def linkTemplates(self, sentence):
        """ link group size and group templates using Hungarian matching algorithm """
        templates = sentence.templates
        qTemplateList = templates.getList(self.quantityType)
        mTemplateList = templates.getList(self.mentionType)

        nQuantities = len(qTemplateList)
        nMentions = len(mTemplateList)
        maxSize = max(nQuantities, nMentions)

        if nQuantities == 0 or nMentions == 0:
            return

        probMatrix = []
        for qIdx in range(maxSize):
            probMatrix.append([])
            for mIdx in range(maxSize):
                probMatrix[qIdx].append(0)

        for fv in templates.featureVectors:
            probMatrix[fv.valueId][fv.mentionId] = fv.prob * 1000

        costMatrix = munkres.make_cost_matrix(probMatrix, lambda cost: 1000 - cost)
        m = munkres.Munkres()
        #    print probMatrix
        #    print costMatrix
        indices = m.compute(costMatrix)
        for qIdx, mIdx in indices:
            if qIdx < nQuantities and mIdx < nMentions:
                prob = probMatrix[qIdx][mIdx]
                if prob >= 500:
                    # this quantity and mention should be associated
                    prob = float(prob) / 1000
                    qTemplate = qTemplateList[qIdx]
                    mTemplate = mTemplateList[mIdx]
                    self.linkQuantityAndMention(qTemplate, mTemplate, prob)
lda=np.reshape(lda,[145*145])

##Adding dummy label from 0-16
gmm_vote=np.append(gmm,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16])
lda_vote=np.append(lda,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16])
cmeans_vote=np.append(cmeans,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16])
kmeans_vote=np.append(kmeans,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16])
isodata_vote=np.append(isodata,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16])
svm_vote=np.append(svm,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16])

###Construct Label Map with K-Means as the baseline

#Construct Label Map between K-Means and LDA
matrix=contingency_matrix(kmeans_vote,lda_vote)
matrix = matrix.tolist()
cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxsize - cost)
m = Munkres()
indexes = m.compute(cost_matrix)
total = 0
newlabelA=[]
for row, column in indexes:
    value = matrix[row][column]
    total += value
    newlabelA.append(column)
    
#Construct Label Map between K-Means and Gausian Mixture Model
matrix=contingency_matrix(kmeans_vote,gmm_vote)
matrix = matrix.tolist()
cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxsize - cost)
m = Munkres()
indexes = m.compute(cost_matrix)
Exemple #46
0
matrix3 = ([[7, 53, 183, 439, 863, 497, 383, 563, 79, 973, 287, 63, 343, 169, 583],
            [627, 343, 773, 959, 943, 767, 473, 103, 699, 303, 957, 703, 583, 639, 913],
            [447, 283, 463, 29, 23, 487, 463, 993, 119, 883, 327, 493, 423, 159, 743],
            [217, 623, 3, 399, 853, 407, 103, 983, 89, 463, 290, 516, 212, 462, 350],
            [960, 376, 682, 962, 300, 780, 486, 502, 912, 800, 250, 346, 172, 812, 350],
            [870, 456, 192, 162, 593, 473, 915, 45, 989, 873, 823, 965, 425,329, 803],
            [973, 965, 905, 919, 133, 673, 665, 235, 509, 613, 673, 815, 165, 992, 326],
            [322, 148, 972, 962, 286, 255, 941, 541, 265, 323, 925, 281, 601, 95, 973],
            [445, 721, 11, 525, 473, 65, 511, 164, 138, 672, 18, 428, 154, 448, 848],
            [414, 456, 310, 312, 798, 104, 566, 520, 302, 248, 694, 976, 430, 392, 198],
            [184, 829, 373, 181, 631, 101, 969, 613, 840, 740, 778, 458, 284, 760, 390],
            [821, 461, 843, 513, 17, 901, 711, 993, 293, 157, 274, 94, 192, 156, 574],
            [34, 124, 4, 878, 450, 476, 712, 914, 838, 669, 875, 299, 823, 329, 699],
            [815, 559, 813, 459, 522, 788, 168, 586, 966, 232, 308, 833, 251, 631, 107],
            [813, 883, 451, 509, 615, 77, 281, 613, 459, 205, 380, 274, 302, 35, 805]])

costmat = make_cost_matrix(matrix3, lambda cost: sys.maxint - cost)

m = Munkres()
indexes = m.compute(costmat)
tot = 0

for row,col in indexes:
    tp = matrix3[row][col]
    tot += tp


print indexes
print tot
    def linkTemplates(self, sentence):
        """ link group size and group templates using Hungarian matching algorithm """
        #    print 'linking all templates'
        templates = sentence.templates
        onList = templates.getList('on')
        erList = templates.getList('eventrate')
        outcomeMeasurements = templates.getOutcomeMeasurementList()
        groupList = sentence.abstract.entities.lists['group']
        outcomeList = sentence.abstract.entities.lists['outcome']

        nON = len(onList)
        nER = len(erList)
        nGroups = len(groupList)
        nOutcomes = len(outcomeList)
        nGroupOutcomePairs = nGroups * nOutcomes
        if (nON + nER) == 0 or nGroupOutcomePairs == 0:
            return  # missing key information cannot make any associations

        goPairs = []
        for group in groupList:
            for outcome in outcomeList:
                #        goPairs.append((group,outcome))

                (nMatched, nUnmatched1, nUnmatched2) = group.partialSetMatch(outcome)

                if self.groupOutcomeOverlap(group, outcome) == False:
                    # overlap between group/outcome may be no more than ONE word and this may be no more than 1/3 of smaller mention
                    goPairs.append((group,outcome))
                else:
                    print sentence.abstract.id, '#### skipping:', group.rootMention().name, ';', outcome.rootMention().name

        nGroupOutcomePairs = len(goPairs)
        if nGroupOutcomePairs == 0:
            return  # missing key information cannot make any associations


        # get unmatched event rates and outcome numbers
        unmatchedON = []
        unmatchedER = []
        for om in outcomeMeasurements:
            on = om.getOutcomeNumber()
            er = om.getTextEventRate()
            if er != None and on == None:
                # unmatched event rate
                unmatchedER.append(er)
            elif on != None and er == None:
                # unmatched number of outcomes
                unmatchedON.append(on)

        # identify as of yet unmatched event rates and outcome numbers that could potentially match each other
        erMatches = {}
        onMatches = {}
        for on in unmatchedON:
            onMatches[on] = []
        for er in unmatchedER:
            erMatches[er] = []

        for on in unmatchedON:
            couldCalculateER = False
            if on.hasAssociatedGroupSize():
                calculatedER = on.eventRate()
                couldCalculateER = True
                for er in unmatchedER:
                    if er.equivalentEventRates(calculatedER):
                        onMatches[on].append(er)
                        erMatches[er].append(on)
            else:
                for group in groupList:
                    groupFV = on.getMatchFeatures(group)
                    if groupFV != None and groupFV.prob > 0:
                        # it is possible to associate with this group
                        gs = group.getSize(sentenceIndex=sentence.index)
                        if gs > 0:
                            calculatedER = on.eventRate(groupSize=gs)
                            couldCalculateER = True
                            for er in unmatchedER:
                                if er.equivalentEventRates(calculatedER):
                                    onMatches[on].append(er)
                                    erMatches[er].append(on)
            if couldCalculateER == False:
                outcomeMeasurements.remove(on.outcomeMeasurement)

        # discard any outcome numbers that potentially match multiple event rates
        for on in onMatches.keys():
            if len(onMatches[on]) == 1 and len(erMatches[onMatches[on][0]]) == 1:
                # this outcome number is a potential match for only one event rate
                # similarly, the event rate is only a match for this outcome number
                # assume they belong to same outcome measurement
                erOM = er.outcomeMeasurement
                on.outcomeMeasurement.addEventRate(er)
                outcomeMeasurements.remove(erOM)

                # now consider all possible valid ON,ER pairings
            #    for on in unmatchedON:
            #      for er in unmatchedER:
            #        if on.hasAssociatedGroupSize() == False or on.equivalentEventRates(er.eventRate()) == True:
            #          om = OutcomeMeasurement(on)
            #          om.addEventRate(er)
            #          outcomeMeasurements.append(om)

        nOutcomeMeasurements = len(outcomeMeasurements)
        maxSize = max(nOutcomeMeasurements, nGroupOutcomePairs)

        # initialize cost matrix for matching outcome measurements with group,outcome pairs
        probMatrix = []
        probMultiplier = 100000

        for omIdx in range(maxSize):
            probMatrix.append([])
            for goIdx in range(maxSize):
                if omIdx < nOutcomeMeasurements and goIdx < nGroupOutcomePairs:
                    om = outcomeMeasurements[omIdx]
                    (group, outcome) = goPairs[goIdx]
                    er = om.getTextEventRate()
                    on = om.getOutcomeNumber()

                    if er != None:
                        outcomeFV = er.getMatchFeatures(outcome)
                        groupFV = er.getMatchFeatures(group)
                        if outcomeFV == None or groupFV == None:
                            # this quantity has no chance of being associated with either the group or outcome mention
                            # this can happen if all mentions for the entity appear in a sentence after the quantity
                            probG_ER = 0
                            probO_ER = 0
                        else:
                            probO_ER = outcomeFV.prob
                            probG_ER = groupFV.prob
                    else:
                        probG_ER = 1
                        probO_ER = 1

                    if on != None:
                        # this outcome measurement has an outcome number
                        # is this number useful? Can we compute an event rate for this group?
                        # If not, discard this measurement (set probability to zero).
                        # if so, is the event rate compatible with the textual event rate?
                        # If not, discard.
                        calculatedER = -1
                        gs = group.getSize(sentenceIndex=sentence.index)
                        outcomeFV = on.getMatchFeatures(outcome)
                        groupFV = on.getMatchFeatures(group)
                        if outcomeFV == None or groupFV == None:
                            # this quantity has no chance of being associated with either the group or outcome mention
                            probG_ON = 0
                            probO_ON = 0
                        else:
                            probO_ON = outcomeFV.prob
                            probG_ON = groupFV.prob

                        if on.hasAssociatedGroupSize() == False:
                            # there is no group size already associated with the outcome number
                            # does the group have a group size?
                            # If so, is the resulting event rate compatible with the text one?
                            if gs <= 0 and er == None:
                                # there is no way to compute an event rate with this outcome measurement.
                                # it does not add any useful information.
                                # discard it by setting probability to zero
                                probG_ON = 0
                                probO_ON = 0
                            elif gs > 0:
                                # the proposed group has an associated size
                                # we can compute an event rate for this group/outcome
                                calculatedER = on.eventRate(groupSize=gs)
                                if (er != None and er.equivalentEventRates(calculatedER) == False) or abs(calculatedER) > 1:
                                    # event rates are incompatible
                                    probG_ON = 0
                                    probO_ON = 0
                    else:
                        probG_ON = 1
                        probO_ON = 1

                    if er != None and on != None:
                        probG_OM = math.sqrt(probG_ER * probG_ON)
                        probO_OM = math.sqrt(probO_ER * probO_ON)
                    elif er != None:
                        probG_OM = probG_ER
                        probO_OM = probO_ER
                    else:
                        # on != None
                        probG_OM = probG_ON
                        probO_OM = probO_ON

                    prob = round(probG_OM * probO_OM * probMultiplier)
                else:
                    prob = 0

                probMatrix[omIdx].append(prob)

            #    if sentence.abstract.id == '21600592':
            #      for omIdx in range(maxSize):
            #        for goIdx in range(maxSize):
            #          if omIdx < nOutcomeMeasurements and goIdx < nGroupOutcomePairs:
            #            om = outcomeMeasurements[omIdx]
            #            (group, outcome) = goPairs[goIdx]
            #            print probMatrix[omIdx][goIdx], om.statisticString(), group.name, outcome.name

        costMatrix = munkres.make_cost_matrix(probMatrix, lambda cost: probMultiplier - cost)
        m = munkres.Munkres()
        #    print probMatrix
        #    print costMatrix
        indices = m.compute(costMatrix)
        # threshold is (1/2)^4
        threshold = 0.0625 * probMultiplier
        threshold = 0.0001 * probMultiplier
        #    threshold = 0.25 * probMultiplier

        for omIdx, goIdx in indices:
            if omIdx < nOutcomeMeasurements and goIdx < nGroupOutcomePairs:
                prob = probMatrix[omIdx][goIdx]
                if prob > threshold:
                    # this quantity and mention should be associated
                    prob = float(prob) / probMultiplier
                    om = outcomeMeasurements[omIdx]
                    (group, outcome) = goPairs[goIdx]
                    self.linkOutcomeMeasurementAssociations(om, group, outcome, prob)
            # record those outcome measurements that were not succefully matched to G,O
            if omIdx < nOutcomeMeasurements and (goIdx >= nGroupOutcomePairs or probMatrix[omIdx][goIdx] <= threshold):
                om = outcomeMeasurements[omIdx]
                prob = float(probMatrix[omIdx][goIdx])/probMultiplier
                if goIdx < nGroupOutcomePairs:
                    (group, outcome) = goPairs[goIdx]
                else:
                    group = None
                    outcome = None
                abstract = sentence.abstract
                if abstract not in self.incompleteMatches:
                    self.incompleteMatches[abstract] = []
                self.incompleteMatches[abstract].append(baseassociator.OutcomeMeasurementAssociation(group, outcome, om, prob))
Exemple #48
0
def optimizeMatrix():
    cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost)
    m = Munkres()
    indexes = m.compute(cost_matrix)
    return indexes
Exemple #49
0
def costify(similarity_matrix):
    """Transform a similarity matrix into a cost matrix."""
    return munkres.make_cost_matrix(similarity_matrix, lambda s: 1 - s)
        t_idx = np.argmax(score_matrix[s_idx])
        if targets[s_idx, t_idx] > 0:
            correct.append(s_idx)
        else:
            errors.append(s_idx)
    total = len(correct) + len(errors)
    print "Right: %d/%d = %f%%, Wrong: %d/%d = %f%%" \
        % (len(correct), total, 100. * len(correct) / total,
           len(errors), total, 100. * len(errors) / total)

    sys.exit()

    print "Finding best matching"
    m = munkres.Munkres()
    correct, errors = [], []
    cost_matrix = munkres.make_cost_matrix(score_matrix, lambda cost: 1 - cost)
    indexes = m.compute(cost_matrix)
    for row, column in indexes:
        if targets[row, column] > 0:
            correct.append((row, column))
        else:
            errors.append((row, column))
    total = len(correct) + len(errors)
    print "Right: %d/%d = %f%%, Wrong: %d/%d = %f%%" \
        % (len(correct), total, 100. * len(correct) / total,
           len(errors), total, 100. * len(errors) / total)

    # scores = cross_validation.cross_val_score(
    #     clf, m, targets, cv=5, scoring=scoring)
    # print sum(predicted), sum(predicted - targets)
    # print metrics.classification_report(targets, predicted)
gmm_model = sklmix.GMM(n_components=3, covariance_type='full')
gmm_model.fit(iris[['PW', 'PL', 'SW']])
yhat = gmm_model.predict(iris[['PW', 'PL', 'SW']])
crosstab = pd.crosstab(iris['Type'], yhat, rownames=['true'], colnames=['predicted'])
print crosstab

# <headingcell level=4>

# Align the confusion matrix with a non-standard package

# <codecell>

import munkres
import sys
m = munkres.Munkres()
cost = munkres.make_cost_matrix(crosstab.values.tolist(), lambda x : sys.maxint - x)
align = m.compute(cost)
print align, '\n'

permute = [x[1] for x in align]
new_label = np.argsort(permute)
yhat_new = new_label[yhat]
print pd.crosstab(iris['Type'], yhat_new, rownames=['true'], colnames=['predicted'])

# <headingcell level=4>

# Bridging the gap with Rpy2

# <codecell>

from rpy2.robjects import r
Exemple #52
0
from munkres import Munkres, print_matrix, make_cost_matrix
import sys

n, m = map(int, raw_input().split())
matrix = []
for i in xrange(n):
  matrix.append(map(int, raw_input().split()))

cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost)
m = Munkres()
indexes = m.compute(cost_matrix)

for i, j in indexes:
  value = matrix[i][j]
  print '%d,%d' % (i, j)
Exemple #53
0
def cost_matrix(contingency_table):
    """Hungarian method assumes the goal of minimum cost, while our goal with a
    contigency table is maximum cost. To deal with this, the table is inverted in
    an additive sense.
    """
    return make_cost_matrix(contingency_table, lambda cost: sys.maxsize - cost)
    def linkTemplatesHungarian(self, sentence):
        """ link group size and group templates using Hungarian matching algorithm """
        #    print 'linking all templates'
        templates = sentence.templates
        costValueList = templates.getList('cost_value')
        outcomeList = templates.getList('outcome')
        groupList = templates.getList('group')

        nCostValues = len(costValueList)
        nGroupOutcomePairs = len(outcomeList)*len(groupList)

        if nGroupOutcomePairs == 0 or nCostValues == 0:
            return

        maxSize = max(nCostValues, nGroupOutcomePairs)

        # build list of group-outcome pairs
        goPairs = []
        for group in groupList:
            for outcome in outcomeList:
                goPairs.append((group, outcome))

        # initialize cost matrix for matching cost values with group,outcome pairs
        probMatrix = []
        probMultiplier = 100000

        for cvIdx in range(maxSize):
            probMatrix.append([])
            for goIdx in range(maxSize):
                if cvIdx < nCostValues and goIdx < nGroupOutcomePairs:
                    cv = costValueList[cvIdx]
                    (group, outcome) = goPairs[goIdx]

                    outcomeFV = cv.getMatchFeatures(outcome)
                    groupFV = cv.getMatchFeatures(group)
                    groupProb = groupFV.prob
                    outcomeProb = outcomeFV.prob
                    prob = round(groupProb * outcomeProb * probMultiplier)
                else:
                    # no association can be made
                    # this possible match involves either a dummy cost value or a dummy (group,outcome) pair
                    prob = 0

                probMatrix[cvIdx].append(prob)

        costMatrix = munkres.make_cost_matrix(probMatrix, lambda cost: probMultiplier - cost)
        m = munkres.Munkres()
        #    print probMatrix
        #    print costMatrix
        indices = m.compute(costMatrix)
        # threshold is (1/2)^4
        # threshold = 0.0625 * probMultiplier
        threshold = 0.0001 * probMultiplier
        #    threshold = 0.25 * probMultiplier

        for cvIdx, goIdx in indices:
            if cvIdx < nCostValues and goIdx < nGroupOutcomePairs:
                prob = probMatrix[cvIdx][goIdx]
                if prob > threshold:
                    # this quantity and mention should be associated
                    prob = float(prob) / probMultiplier
                    om = costValueList[cvIdx]
                    (group, outcome) = goPairs[goIdx]
                    self.linkOutcomeMeasurementAssociations(om, group, outcome, prob)