def _run_final_data(self, raw_data: str) -> str: top_k_all = self.__top_k_to_test res = "k" for scoring_scheme in self.__scoring_schemes: res += ";b5-b4=" + str(scoring_scheme.b5-scoring_scheme.b4) res += "\n" h_res = {} for top_k in top_k_all: h_res[top_k] = {} for sc in self.__scoring_schemes: h_res[top_k][sc.b5] = [] for top_k in top_k_all: h_res_topk = h_res[top_k] for line in raw_data.split("\n")[1:]: if len(line) > 1: cols = line.split(";") b5 = float(cols[0]) h_res_topk_sc = h_res_topk[b5] consensus = Consensus([parse_ranking_with_ties_of_int(cols[-1])]) gs = set() gs_str = cols[3][1:-1] for elem in gs_str.split(", "): gs.add(int(elem)) h_res_topk_sc.append(consensus.evaluate_topk_ranking(gs, top_k=top_k)) for top_k in top_k_all: res += str(top_k) h_topk = h_res[top_k] for sc in self.__scoring_schemes: res += ";" + str(np.sum(np.asarray(h_topk[sc.b5]))) res += "\n" return res
def compute_consensus_rankings(self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ sc = asarray(scoring_scheme.penalty_vectors) consensus = [] elements_translated_target = [] var = self.prepare_internal_vars(elements_translated_target, dataset.rankings) self.kwik_sort(consensus, elements_translated_target, var, sc) return Consensus( consensus_rankings=[consensus], dataset=dataset, scoring_scheme=scoring_scheme, att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
def __get_consensus_from_files(self, folder_consensus: str): for sc in self.__scoring_schemes: self.__consensus[sc] = [] for dataset in self._datasets: self.__consensus[sc].append( (dataset, Consensus.get_consensus_from_file( join_paths(folder_consensus, str(sc.b5), name_file(dataset.name)))))
def compute_consensus_rankings(self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings( scoring_scheme): raise ScoringSchemeNotHandledException if scoring_scheme.is_equivalent_to([[0, 1, 1, 0, 1, 1], [1, 1, 0, 1, 1, 0]]): rankings_to_use = dataset.unified_rankings() else: rankings_to_use = dataset.rankings nb_rankings = len(rankings_to_use) rankings_copy = list(rankings_to_use) shuffle(rankings_copy) h = {} id_ranking = 0 for ranking in rankings_copy: id_bucket = 0 for bucket in ranking: for element in bucket: if element not in h: h[element] = zeros(nb_rankings, dtype=int) - 1 h[element][id_ranking] = id_bucket id_bucket += 1 id_ranking += 1 res = [] for el in sorted(h.items(), key=cmp_to_key(RepeatChoice.__compare)): res.append([el[0]]) # kem = KemenyComputingFactory(scoring_scheme=self.scoring_scheme) # kem = KendallTauGeneralizedNlogN() return Consensus( consensus_rankings=[res], dataset=dataset, scoring_scheme=scoring_scheme, att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
def compute_consensus_rankings( self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False ) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ sc = scoring_scheme.penalty_vectors if not dataset.is_complete: for i in range(3): if sc[0][i] > sc[0][i+3] or sc[1][i] > sc[1][i+3]: raise InompleteRankingsIncompatibleWithScoringSchemeException rankings_to_use = dataset.unified_rankings() else: rankings_to_use = dataset.rankings k = KemenyComputingFactory(scoring_scheme) dst_min = float('inf') consensus = [[]] for ranking in rankings_to_use: dist = k.get_kemeny_score(ranking, dataset.rankings) if dist < dst_min: dst_min = dist consensus.clear() consensus.append(ranking) elif dist == dst_min and not return_at_most_one_ranking: consensus.append(ranking) return Consensus(consensus_rankings=consensus, dataset=dataset, scoring_scheme=scoring_scheme, att={ConsensusFeature.KemenyScore: dst_min, ConsensusFeature.AssociatedAlgorithm: self.get_full_name() } )
def compute_consensus_rankings( self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False ) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ rankings = dataset.rankings elem_id = {} id_elements = {} id_elem = 0 for ranking in rankings: for bucket in ranking: for element in bucket: if element not in elem_id: elem_id[element] = id_elem id_elements[id_elem] = element id_elem += 1 nb_elem = len(elem_id) positions = ExactAlgorithmCplex.__positions(rankings, elem_id) sc = asarray(scoring_scheme.penalty_vectors) graph_elements = Graph() sub_problems = [] if self.__preprocess: graph_elements, mat_score = self.__graph_of_elements(positions, asarray(sc)) scc_s = graph_elements.components() for scc in scc_s: sub_problem = [] for elem in scc: sub_problem.append(elem) sub_problems.append(sub_problem) else: mat_score = self.__cost_matrix(positions, asarray(sc)) sub_problems.append(list(range(nb_elem))) return Consensus(consensus_rankings=medianes, dataset=dataset, scoring_scheme=scoring_scheme, att={ConsensusFeature.IsNecessarilyOptimal: True, ConsensusFeature.KemenyScore: my_prob.solution.get_objective_value(), ConsensusFeature.AssociatedAlgorithm: self.get_full_name() })
def compute_consensus_rankings( self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False ) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ if self.bound_for_exact > 0: from corankco.algorithms.exact.exactalgorithm import ExactAlgorithm optimal = True sc = asarray(scoring_scheme.penalty_vectors) rankings = dataset.rankings res = [] elem_id = {} id_elements = {} id_elem = 0 for ranking in rankings: for bucket in ranking: for element in bucket: if element not in elem_id: elem_id[element] = id_elem id_elements[id_elem] = element id_elem += 1 positions = dataset.get_positions(elem_id) gr1, mat_score = self.__graph_of_elements(positions, sc) scc = gr1.components() for scc_i in scc: if len(scc_i) == 1: res.append([id_elements.get(scc_i[0])]) else: all_tied = True for e1, e2 in combinations(scc_i, 2): if mat_score[e1][e2][2] > mat_score[e1][e2][0] or mat_score[e1][e2][2] > mat_score[e1][e2][1]: all_tied = False break if all_tied: buck = [] for el in scc_i: buck.append(id_elements.get(el)) res.append(buck) else: set_scc = set(scc_i) project_rankings = [] for ranking in rankings: project_ranking = [] for bucket in ranking: project_bucket = [] for elem in bucket: if elem_id.get(elem) in set_scc: project_bucket.append(elem) if len(project_bucket) > 0: project_ranking.append(project_bucket) if len(project_ranking) > 0: project_rankings.append(project_ranking) if len(scc_i) > self.bound_for_exact: cons_ext = self.auxiliary_alg.compute_consensus_rankings(Dataset(project_rankings), scoring_scheme, True).consensus_rankings[0] res.extend(cons_ext) optimal = False else: cons_ext = ExactAlgorithm(preprocess=False).compute_consensus_rankings( Dataset(project_rankings), scoring_scheme, True).consensus_rankings[0] res.extend(cons_ext) hash_information = {ConsensusFeature.IsNecessarilyOptimal: optimal, ConsensusFeature.AssociatedAlgorithm: self.get_full_name() } if not bench_mode: cfc_name = [] for scc_i in scc: group = set() for elem in scc_i: group.add(id_elements.get(elem)) cfc_name.append(group) hash_information[ConsensusFeature.WeakPartitioning] = cfc_name return Consensus(consensus_rankings=[res], dataset=dataset, scoring_scheme=scoring_scheme, att=hash_information)
def compute_consensus_rankings(self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings( scoring_scheme): raise ScoringSchemeNotHandledException if scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme().penalty_vectors) or \ scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme_p(0.5).penalty_vectors): rankings_to_use = dataset.unified_rankings() else: rankings_to_use = dataset.rankings points = {} for ranking in rankings_to_use: id_bucket = 1 for bucket in ranking: for elem in bucket: if elem not in points: points[elem] = {} points[elem][0] = 0 points[elem][1] = 0 points[elem][0] += id_bucket points[elem][1] += 1 if self.useBucketIdAndNotBucketSize: id_bucket += 1 else: id_bucket += len(bucket) lis = [] for elem in points.keys(): lis.append((elem, points[elem][0] * 1.0 / points[elem][1])) tri = sorted(lis, key=lambda col: col[1]) consensus = [] bucket = [] last = -1 for duo in tri: if duo[1] != last: last = duo[1] bucket = [] consensus.append(bucket) bucket.append(duo[0]) return Consensus( consensus_rankings=[consensus], dataset=dataset, scoring_scheme=scoring_scheme, att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
def compute_consensus_rankings( self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False ) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ rankings = dataset.rankings elem_id = {} id_elements = {} id_elem = 0 for ranking in rankings: for bucket in ranking: for element in bucket: if element not in elem_id: elem_id[element] = id_elem id_elements[id_elem] = element id_elem += 1 nb_elem = len(elem_id) positions = ExactAlgorithmGeneric.__positions(rankings, elem_id) sc = asarray(scoring_scheme.penalty_vectors) graph, mat_score, ties_must_be_checked = self.__graph_of_elements(positions, sc) my_values = [] my_vars = [] h_vars = {} cpt = 0 for i in range(nb_elem): for j in range(nb_elem): if not i == j: name_var = "x_%s_%s" % (i, j) my_values.append(mat_score[i][j][0]) my_vars.append(pulp.LpVariable(name_var, 0, 1, cat="Binary")) h_vars[name_var] = cpt cpt += 1 if i < j: name_var = "t_%s_%s" % (i, j) my_values.append(mat_score[i][j][2]) my_vars.append(pulp.LpVariable(name_var, 0, 1, cat="Binary")) h_vars[name_var] = cpt cpt += 1 prob = pulp.LpProblem("myProblem", pulp.LpMinimize) # add the binary order constraints for i in range(0, nb_elem - 1): for j in range(i + 1, nb_elem): if not i == j: prob += my_vars[h_vars["x_%s_%s" % (i, j)]] \ + my_vars[h_vars["x_%s_%s" % (j, i)]] \ + my_vars[h_vars["t_%s_%s" % (i, j)]] == 1 # add the transitivity constraints for i in range(0, nb_elem): for j in range(nb_elem): if j != i: i_bef_j = "x_%s_%s" % (i, j) if i < j: i_tie_j = "t_%s_%s" % (i, j) else: i_tie_j = "t_%s_%s" % (j, i) for k in range(nb_elem): if k != i and k != j: j_bef_k = "x_%s_%s" % (j, k) i_bef_k = "x_%s_%s" % (i, k) if j < k: j_tie_k = "t_%s_%s" % (j, k) else: j_tie_k = "t_%s_%s" % (k, j) if i < k: i_tie_k = "t_%s_%s" % (i, k) else: i_tie_k = "t_%s_%s" % (k, i) prob += my_vars[h_vars[i_bef_j]] +\ my_vars[h_vars[j_bef_k]] \ + my_vars[h_vars[j_tie_k]] \ - my_vars[h_vars[i_bef_k]] <= 1 prob += my_vars[h_vars[i_bef_j]] + \ my_vars[h_vars[i_tie_j]] \ + my_vars[h_vars[j_bef_k]] - my_vars[h_vars[i_bef_k]] <= 1 prob += 2 * my_vars[h_vars[i_tie_j]] \ + 2 * my_vars[h_vars[j_tie_k]] \ - my_vars[h_vars[i_tie_k]] <= 3 # optimization if not ties_must_be_checked: for i in range(0, nb_elem - 1): for j in range(i + 1, nb_elem): if not i == j: prob += my_vars[h_vars["t_%s_%s" % (i, j)]] == 0 cfc = graph.components() for i in range(len(cfc)): group_i = cfc[i] for j in range(i+1, len(cfc)): for elem_i in group_i: for elem_j in cfc[j]: prob += my_vars[h_vars["x_%s_%s" % (elem_i, elem_j)]] == 1 prob += my_vars[h_vars["x_%s_%s" % (elem_j, elem_i)]] == 0 if elem_i < elem_j: prob += my_vars[h_vars["t_%s_%s" % (elem_i, elem_j)]] == 0 else: prob += my_vars[h_vars["t_%s_%s" % (elem_j, elem_i)]] == 0 # objective function prob += pulp.lpSum(my_vars[cpt] * my_values[cpt] for cpt in range(len(my_vars))) try: prob.solve(pulp.CPLEX(msg=False)) except: prob.solve(pulp.PULP_CBC_CMD(msg=False)) h_def = {i: 0 for i in range(nb_elem)} for var in my_vars: if abs(var.value() - 1) < 0.01 and var.name[0] == "x": h_def[int(var.name.split("_")[2])] += 1 ranking = [] current_nb_def = 0 bucket = [] for elem, nb_defeats in (sorted(h_def.items(), key=itemgetter(1))): if nb_defeats == current_nb_def: bucket.append(id_elements[elem]) else: ranking.append(bucket) bucket = [id_elements[elem]] current_nb_def = nb_defeats ranking.append(bucket) return Consensus(consensus_rankings=[ranking], dataset=dataset, scoring_scheme=scoring_scheme, att={ConsensusFeature.IsNecessarilyOptimal: True, ConsensusFeature.KemenyScore: prob.objective.value(), ConsensusFeature.AssociatedAlgorithm: self.get_full_name() })
def compute_consensus_rankings(self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ sc = asarray(scoring_scheme.penalty_vectors) rankings = dataset.rankings res = [] elem_id = {} id_elements = {} id_elem = 0 nb_rankings = len(rankings) for ranking in rankings: for bucket in ranking: for element in bucket: if element not in elem_id: elem_id[element] = id_elem id_elements[id_elem] = element id_elem += 1 nb_elements = len(elem_id) positions = BioConsert.__get_positions(rankings, elem_id) (departure, dst_res) = self.__departure_rankings(dataset, positions, elem_id, scoring_scheme) departure_c = array(departure.flatten(), dtype=int32) bioconsertinc.bioconsertinc( array(positions.flatten(), dtype=int32), departure_c, array(sc[0], dtype=float64), array(sc[1], dtype=float64), int32(nb_elements), int32(nb_rankings), int32(len(departure)), dst_res, ) departure = departure_c.reshape(-1, nb_elements) ranking_dict = {} lowest_distance = amin(dst_res) best_rankings = departure[where( dst_res == lowest_distance)[0]].tolist() if return_at_most_one_ranking: best_rankings = [best_rankings[-1]] distinct_rankings = set() for ranking_result in best_rankings: st_ranking = str(ranking_result) if st_ranking not in distinct_rankings: distinct_rankings.add(st_ranking) ranking_dict.clear() el = 0 for id_bucket in ranking_result: if id_bucket not in ranking_dict: ranking_dict[id_bucket] = [id_elements.get(el)] else: ranking_dict[id_bucket].append(id_elements.get(el)) el += 1 ranking_list = [] nb_buckets_ranking_i = len(ranking_dict) for id_bucket in range(nb_buckets_ranking_i): ranking_list.append(ranking_dict.get(id_bucket)) res.append(ranking_list) return Consensus(consensus_rankings=res, dataset=dataset, scoring_scheme=scoring_scheme, att={ ConsensusFeature.KemenyScore: lowest_distance, ConsensusFeature.AssociatedAlgorithm: self.get_full_name() })
def compute_consensus_rankings(self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings( scoring_scheme): raise ScoringSchemeNotHandledException if scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme().penalty_vectors) or \ scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme_p(0.5).penalty_vectors): rankings_to_use = dataset.unified_rankings() else: rankings_to_use = dataset.rankings has = {} nb_rankings_needed = {} already_put = set() for ranking in rankings_to_use: for bucket in ranking: for element in bucket: if element not in nb_rankings_needed: nb_rankings_needed[element] = self.__h else: nb_rankings_needed[element] += self.__h bucket_res = [] ranking_res = [] for reorganized in zip_longest(*rankings_to_use): for bucket in reorganized: if bucket is not None: for element in bucket: if element not in already_put: if element not in has: has[element] = 1 if nb_rankings_needed[element] <= 1: bucket_res.append(element) already_put.add(element) else: has[element] += 1 if has[element] >= nb_rankings_needed[element]: bucket_res.append(element) already_put.add(element) if len(bucket_res) > 0: ranking_res.append(bucket_res) bucket_res = [] rankings_consensus = [ranking_res] if len(ranking_res) > 0 else [[]] return Consensus( consensus_rankings=rankings_consensus, dataset=dataset, scoring_scheme=scoring_scheme, att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
def compute_consensus_rankings(self, dataset: Dataset, scoring_scheme: ScoringScheme, return_at_most_one_ranking=False, bench_mode=False) -> Consensus: """ :param dataset: A dataset containing the rankings to aggregate :type dataset: Dataset (class Dataset in package 'datasets') :param scoring_scheme: The penalty vectors to consider :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances') :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :param bench_mode: is bench mode activated. If False, the algorithm may return more information :type bench_mode: bool :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found. In all scenario, the algorithm returns a list of consensus rankings :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the implementation of the algorithm does not fit with the scoring scheme """ rankings = dataset.rankings elem_id = {} id_elements = {} id_elem = 0 for ranking in rankings: for bucket in ranking: for element in bucket: if element not in elem_id: elem_id[element] = id_elem id_elements[id_elem] = element id_elem += 1 nb_elem = len(elem_id) positions = ExactAlgorithmCplex.__positions(rankings, elem_id) sc = asarray(scoring_scheme.penalty_vectors) graph_elements = Graph() if self.__preprocess: graph_elements, mat_score = self.__graph_of_elements( positions, asarray(sc)) else: mat_score = self.__cost_matrix(positions, asarray(sc)) map_elements_cplex = {} my_prob = cplex.Cplex() # initiate my_prob.set_results_stream(None) # mute my_prob.parameters.mip.tolerances.mipgap.set(0.000001) my_prob.parameters.mip.pool.absgap.set(0.000001) my_prob.objective.set_sense( my_prob.objective.sense.minimize ) # we want to minimize the objective function if not return_at_most_one_ranking: my_prob.parameters.mip.pool.intensity.set(4) my_prob.parameters.mip.limits.populate.set(10000000) my_obj = [] my_ub = [] my_lb = [] my_names = [] cpt = 0 should_consider_ties = False for i in range(nb_elem): for j in range(nb_elem): if not i == j: if not should_consider_ties: calc = mat_score[i][j][0] + mat_score[i][j][ 1] - 2 * mat_score[i][j][2] if (-0.00001 <= calc <= 0.00001 and not return_at_most_one_ranking) or calc > 0: should_consider_ties = True s = "x_%s_%s" % (i, j) my_obj.append(mat_score[i][j][0]) my_ub.append(1.0) my_lb.append(0.0) my_names.append(s) map_elements_cplex[cpt] = ("x", i, j) cpt += 1 for i in range(nb_elem): for j in range(i + 1, nb_elem): s = "t_%s_%s" % (i, j) my_obj.append(mat_score[i][j][2]) my_ub.append(1.0) my_lb.append(0.0) my_names.append(s) map_elements_cplex[cpt] = ("t", i, j) cpt += 1 my_prob.variables.add(obj=my_obj, lb=my_lb, ub=my_ub, types="B" * cpt, names=my_names) # rhs = right hand side my_rhs = [] my_rownames = [] # inequations : E for Equality, G for >= and L for <= my_sense = "E" * int(nb_elem * (nb_elem - 1) / 2) + "L" * (3 * nb_elem * (nb_elem - 1) * (nb_elem - 2)) rows = [] # add the binary order constraints count = 0 for i in range(0, nb_elem - 1): for j in range(i + 1, nb_elem): if not i == j: s = "c%s" % count count += 1 my_rhs.append(1) my_rownames.append(s) first_var = "x_%s_%s" % (i, j) second_var = "x_%s_%s" % (j, i) third_var = "t_%s_%s" % (i, j) row = [[first_var, second_var, third_var], [1.0, 1.0, 1.0]] rows.append(row) # add the transitivity constraints for i in range(0, nb_elem): for j in range(nb_elem): if j != i: i_bef_j = "x_%s_%s" % (i, j) if i < j: i_tie_j = "t_%s_%s" % (i, j) else: i_tie_j = "t_%s_%s" % (j, i) for k in range(nb_elem): if k != i and k != j: my_rownames.append("c%s" % count) my_rhs.append(1) count += 1 if j < k: j_tie_k = "t_%s_%s" % (j, k) else: j_tie_k = "t_%s_%s" % (k, j) rows.append([[ i_bef_j, "x_%s_%s" % (j, k), j_tie_k, "x_%s_%s" % (i, k) ], [1., 1., 1., -1.]]) my_rownames.append("c%s" % count) my_rhs.append(1) count += 1 rows.append([[ i_bef_j, i_tie_j, "x_%s_%s" % (j, k), "x_%s_%s" % (i, k) ], [1., 1., 1., -1.]]) if i < k: i_tie_k = "t_%s_%s" % (i, k) else: i_tie_k = "t_%s_%s" % (k, i) my_rownames.append("c%s" % count) my_rhs.append(3) count += 1 rows.append([[i_tie_j, j_tie_k, i_tie_k], [2.0, 2.0, -1.0]]) if self.__optimize and not should_consider_ties: my_sense += "E" * int(nb_elem * (nb_elem - 1) / 2) for i in range(0, nb_elem - 1): for j in range(i + 1, nb_elem): if j != i: my_rownames.append("c%s" % count) my_rhs.append(0) count += 1 i_tie_j = "t_%s_%s" % (i, j) rows.append([[i_tie_j], [1.]]) if self.__preprocess: cpt = 0 scc = graph_elements.components() for i in range(len(scc) - 1): elems_scc_i = scc[i] for j in range(i + 1, len(scc)): elems_scc_j = scc[j] cpt += len(scc[i]) * len(scc[j]) for elem1 in elems_scc_i: for elem2 in elems_scc_j: my_rownames.append("c%s" % count) my_rhs.append(1) count += 1 i_bef_j = "x_%s_%s" % (i, j) rows.append([[i_bef_j], [1.]]) my_sense += "E" * cpt my_prob.linear_constraints.add(lin_expr=rows, senses=my_sense, rhs=my_rhs, names=my_rownames) medianes = [] if not return_at_most_one_ranking: my_prob.populate_solution_pool() nb_optimal_solutions = my_prob.solution.pool.get_num() for i in range(nb_optimal_solutions): names = my_prob.solution.pool.get_values(i) medianes.append( ExactAlgorithmCplex.__create_consensus( nb_elem, names, map_elements_cplex, id_elements)) else: my_prob.solve() x = my_prob.solution.get_values() medianes.append( ExactAlgorithmCplex.__create_consensus(nb_elem, x, map_elements_cplex, id_elements)) return Consensus(consensus_rankings=medianes, dataset=dataset, scoring_scheme=scoring_scheme, att={ ConsensusFeature.IsNecessarilyOptimal: True, ConsensusFeature.KemenyScore: my_prob.solution.get_objective_value(), ConsensusFeature.AssociatedAlgorithm: self.get_full_name() })