Beispiel #1
0
 def school_year_to_dataset(school_year) -> Dataset:
     rankings = []
     marks = school_year.marks
     shape_marks = marks.shape
     nb_classes = shape_marks[1]
     nb_students = shape_marks[0]
     for i in range(nb_classes):
         marks_class = marks[:, i]
         marks_students = []
         for j in range(nb_students):
             if marks_class[j] >= 0:
                 marks_students.append((marks_class[j], j))
         marks_students = sorted(marks_students, reverse=True)
         ranking = []
         current_mark = None
         bucket = []
         for tuple_mark_student in marks_students:
             mark_student = tuple_mark_student[0]
             student = tuple_mark_student[1]
             if mark_student == current_mark:
                 bucket.append(student)
             else:
                 if current_mark is not None:
                     ranking.append(bucket)
                 current_mark = mark_student
                 bucket = [student]
         ranking.append(bucket)
         rankings.append(ranking)
     return Dataset(rankings)
Beispiel #2
0
 def __init__(self, dataset_folder: str,
              dataset_selector: DatasetSelector = None):
     super().__init__()
     self._dataset_selector = dataset_selector
     if self._dataset_selector is None:
         self._dataset_selector = DatasetSelector(0, float('inf'), 0, float('inf'))
     self._datasets = []
     self._datasets = self._dataset_selector.select_datasets(Dataset.get_datasets_from_folder(dataset_folder))
Beispiel #3
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings(
                scoring_scheme):
            raise ScoringSchemeNotHandledException

        if scoring_scheme.is_equivalent_to([[0, 1, 1, 0, 1, 1],
                                            [1, 1, 0, 1, 1, 0]]):
            rankings_to_use = dataset.unified_rankings()
        else:
            rankings_to_use = dataset.rankings

        nb_rankings = len(rankings_to_use)
        rankings_copy = list(rankings_to_use)
        shuffle(rankings_copy)
        h = {}
        id_ranking = 0
        for ranking in rankings_copy:
            id_bucket = 0
            for bucket in ranking:
                for element in bucket:
                    if element not in h:
                        h[element] = zeros(nb_rankings, dtype=int) - 1
                    h[element][id_ranking] = id_bucket
                id_bucket += 1
            id_ranking += 1

        res = []
        for el in sorted(h.items(), key=cmp_to_key(RepeatChoice.__compare)):
            res.append([el[0]])

        # kem = KemenyComputingFactory(scoring_scheme=self.scoring_scheme)
        # kem = KendallTauGeneralizedNlogN()
        return Consensus(
            consensus_rankings=[res],
            dataset=dataset,
            scoring_scheme=scoring_scheme,
            att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
Beispiel #4
0
    def compute_frontiers(self, dataset: Dataset,
                          scoring_scheme: ScoringScheme) -> OrderedPartition:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :return a list of sets of elements such that any exact consensus respects this partitioning
        """
        sc = asarray(scoring_scheme.penalty_vectors)
        rankings = dataset.rankings
        res = []
        elem_id = {}
        id_elements = {}
        id_elem = 0
        for ranking in rankings:
            for bucket in ranking:
                for element in bucket:
                    if element not in elem_id:
                        elem_id[element] = id_elem
                        id_elements[id_elem] = element
                        id_elem += 1

        positions = dataset.get_positions(elem_id)
        gr1, mat_score, robust_arcs = self.__graph_of_elements(positions, sc)
        sccs = gr1.components()
        partition = []
        for scc in sccs:
            partition.append(set(scc))
        i = 0
        while i < len(partition) - 1:
            set1 = partition[i]
            set2 = partition[i + 1]
            fusion = False
            for x in set1:
                for y in set2:
                    if (x, y) not in robust_arcs:
                        fusion = True
                        break
                if fusion:
                    break
            if fusion:
                for x in set2:
                    set1.add(x)
                partition.pop(i + 1)
                i = max(i - 1, 1)
            else:
                i += 1

        res = []
        for group in partition:
            g = set()
            res.append(g)
            for elem in group:
                g.add(id_elements[elem])

        return OrderedPartition(res)
Beispiel #5
0
    def __departure_rankings(
            self, dataset: Dataset, positions: ndarray, elements_id: Dict,
            scoring_scheme: ScoringScheme) -> Tuple[ndarray, ndarray]:

        dst_ini = []
        dataset_unified = dataset.unified_dataset()
        rankings_unified = dataset_unified.rankings

        if len(self.__starting_algorithms) == 0:
            real_pos = array(positions).transpose()
            distinct_rankings = set()
            list_distinct_id_rankings = []

            i = 0
            for ranking in rankings_unified:
                ranking_array = real_pos[i]
                ranking_array[ranking_array == -1] = amax(ranking_array) + 1
                string_ranking = str(ranking_array)
                if string_ranking not in distinct_rankings:
                    distinct_rankings.add(string_ranking)
                    list_distinct_id_rankings.append(i)

                    dst_ini.append(
                        KemenyComputingFactory(
                            scoring_scheme).get_kemeny_score(
                                ranking, dataset.rankings))

                i += 1

            dst_ini.append(
                KemenyComputingFactory(scoring_scheme).get_kemeny_score(
                    [[*elements_id]], dataset.rankings))

            departure = zeros(
                (len(list_distinct_id_rankings) + 1, len(elements_id)),
                dtype=int32)
            departure[:-1] = real_pos[asarray(list_distinct_id_rankings)]
        else:
            m = len(self.__starting_algorithms)
            n = len(elements_id)
            departure = zeros((m, n), dtype=int32) - 1
            id_ranking = 0
            for algo in self.__starting_algorithms:
                cons = algo.compute_consensus_rankings(
                    dataset, scoring_scheme, True).consensus_rankings[0]
                dst_ini.append(
                    KemenyComputingFactory(scoring_scheme).get_kemeny_score(
                        cons, dataset.rankings))
                id_bucket = 0
                for bucket in cons:
                    for element in bucket:
                        departure[id_ranking][elements_id.get(
                            element)] = id_bucket
                    id_bucket += 1
                id_ranking += 1

        return departure, array(dst_ini, dtype=float64)
 def _run_raw_data(self) -> str:
     to_test = list(range(10, 100, 10))
     to_test.extend(list(range(100, 1001, 100)))
     res = ""
     for dataset in self.datasets:
         print(dataset.name)
         h_gene_list_scores = {}
         for element in dataset.elements:
             h_gene_list_scores[element] = []
         shuffle(dataset.rankings)
         for i in to_test:
             dataset_new = Dataset(dataset.rankings[0:i])
             dataset_new.name = dataset.name
             consensus = self._algo.compute_consensus_rankings(dataset_new, self._scoring_cheme, True)
             copeland_scores = consensus.copeland_scores
             for element in dataset_new.elements:
                 cop_score_element = copeland_scores.get(element)
                 h_gene_list_scores[element].append(cop_score_element)
         for element in dataset.elements:
             res += dataset.name + ";" + str(element) + ";" + str(h_gene_list_scores[element]) + "\n"
     return res
Beispiel #7
0
    def compute_consensus_rankings(
            self,
            dataset: Dataset,
            scoring_scheme: ScoringScheme,
            return_at_most_one_ranking=False,
            bench_mode=False
    ) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """
        sc = scoring_scheme.penalty_vectors
        if not dataset.is_complete:
            for i in range(3):
                if sc[0][i] > sc[0][i+3] or sc[1][i] > sc[1][i+3]:
                    raise InompleteRankingsIncompatibleWithScoringSchemeException
            rankings_to_use = dataset.unified_rankings()
        else:
            rankings_to_use = dataset.rankings

        k = KemenyComputingFactory(scoring_scheme)
        dst_min = float('inf')
        consensus = [[]]
        for ranking in rankings_to_use:
            dist = k.get_kemeny_score(ranking, dataset.rankings)
            if dist < dst_min:
                dst_min = dist
                consensus.clear()
                consensus.append(ranking)
            elif dist == dst_min and not return_at_most_one_ranking:
                consensus.append(ranking)

        return Consensus(consensus_rankings=consensus,
                         dataset=dataset,
                         scoring_scheme=scoring_scheme,
                         att={ConsensusFeature.KemenyScore: dst_min,
                              ConsensusFeature.AssociatedAlgorithm: self.get_full_name()
                              }
                         )
Beispiel #8
0
def bootstrap_dataset(dataset: Dataset):
    rankings = []
    flag = True
    dataset_bootstrap = None
    while flag:
        try:
            flag = False
            for i in range(dataset.nb_rankings):
                rankings.append(choice(dataset.rankings))
            dataset_bootstrap = Dataset(rankings)

        except EmptyDatasetException:
            flag = True
    return dataset_bootstrap
Beispiel #9
0
    def rankaggr_brute(preferences):
        '''
		For each sample compute the score given the scoring vector
		'''
        n_candidates = len(preferences[0][0])
        n_model = len(preferences[0])
        n_samples = len(preferences)
        scores = np.zeros((n_samples, n_candidates), dtype="f4")
        #perm = permutations(range(n_candidates))

        #perm = list(perm)

        #print(f"#Model: {n_model} #CAndidates: {n_candidates}")
        #print(f"#Model: {n_model} #SAmples: {n_samples}")
        #print(f"Preferences: {preferences} #Preferences: {len(preferences)}")

        for l in range(len(preferences)):
            #rofile=np.zeros((n_model,n_candidates), dtype="i2")
            profile = []
            #print("********************* PREFERENCES ORIG")
            #print(preferences[l])
            #print("********************* PREFERENCES ORDERED")
            #print(np.unique(preferences[l], axis=1))
            temp_ordered = np.flip(np.unique(preferences[l], axis=1), axis=1)
            #print("********************* PREFERENCES ORDERED INVERSE")
            #print(temp_ordered)
            for i in range(n_model):
                #temp=Vorace.sortingPref(preferences[l][i])
                temp = temp_ordered[i]
                #print("********************* FIRST PREFERENCES ORDERED INVERSE")
                #print(temp)
                #print(f"********************* PREFERENCES ORIG {l} {i}")
                #print(preferences[l][i])
                #print("********************* INDECES")
                temp = [
                    np.where(preferences[l][i] == temp[j])[0]
                    for j in range(len(temp))
                ]
                #print(temp)
                #exit()
                #print([[x] for x in temp ])
                #profile.append([[x] for x in temp ])
                profile.append(temp)

            #print(len(profile))
            ranks = Dataset(profile)
            sc = ScoringScheme()
            if len(profile[0]) > 5:
                consensus = KemRankAgg.compute_consensus(
                    ranks, sc, Algorithm.ParCons)
            else:
                consensus = KemRankAgg.compute_consensus(
                    ranks, sc, Algorithm.Exact)

            for c in range(len(consensus.consensus_rankings[0])):
                candidate = consensus.consensus_rankings[0][c][0]
                scores[l][candidate] = n_candidates - c

            #print(profile)
            #print(scores[l])
            #exit()
        #return min_dist, best_rank
        return scores
Beispiel #10
0
 def get_consensus_from_file(path: str):
     return Consensus(Dataset(path).rankings)
Beispiel #11
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> ConsensusSingleRanking:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        sc = scoring_scheme.penalty_vectors

        res = []
        elem_id = {}
        id_elements = {}
        id_elem = 0
        for ranking in dataset.rankings:
            for bucket in ranking:
                for element in bucket:
                    if element not in elem_id:
                        elem_id[
                            element] = id_elem  # dictionnaire pour retrouver l'id a partir d'un element
                        # (id commence a 0)
                        id_elements[
                            id_elem] = element  # dictionnaire pour retrouver l'element a partir de son id
                        id_elem += 1

        # nb_elements = len(elem_id)

        positions = dataset.get_positions(elem_id)
        n = shape(positions)[0]  # nombre d'elements
        m = shape(positions)[1]  # nombre de classements
        cost_before = sc[0]  # definition des differents couts
        cost_tied = sc[1]
        cost_after = array([
            cost_before[1], cost_before[0], cost_before[2], cost_before[4],
            cost_before[3], cost_before[5]
        ])
        id_scores = {}  # dictionnaire pour retrouver le score d'un element
        id_nb_victoires = {}
        # a partir de son id
        for i in range(0, n, 1):  # initialisation du dictionnaire
            id_scores[i] = 0
            id_nb_victoires[i] = [0, 0, 0]  # victoires, nul, defaites
        for id_el1 in range(0, n, 1):
            mem = positions[id_el1]  # tableau de rangs de el1
            d = count_nonzero(
                mem == -1)  # nombre de fois ou seulement el1 est absent
            for id_el2 in range(id_el1 + 1, n, 1):
                a = count_nonzero(
                    mem + positions[id_el2] ==
                    -2)  # nombre de fois ou el1 et el2 sont absents
                b = count_nonzero(
                    mem == positions[id_el2]
                )  # nombre de fois ou el1 et el2 sont en egalites
                c = count_nonzero(
                    positions[id_el2] ==
                    -1)  # nombre de fois ou seulement el2 est absent
                e = count_nonzero(
                    mem <
                    positions[id_el2])  # nombre de fois ou el1 est avant el2
                relative_positions = array(
                    [e - d + a, m - e - b - c + a, b - a, c - a, d - a,
                     a])  # vecteur omega
                put_before = vdot(relative_positions,
                                  cost_before)  # cout de placer el1 avant el2
                put_after = vdot(relative_positions,
                                 cost_after)  # cout de placer el1 apres el2
                put_tied = vdot(
                    relative_positions,
                    cost_tied)  # cout de placer el1 a egalite avec el2
                """"
                if put_before < put_after and put_before <= put_tied:
                    id_scores[id_el1] += 1
                elif put_after < put_before and put_after <= put_tied:
                    id_scores[id_el2] += 1
                else:
                    id_scores[id_el1] += 0.5
                    id_scores[id_el2] += 0.5

                """

                if put_before < put_after:
                    id_scores[id_el1] += 1
                    id_nb_victoires[id_el1][0] += 1
                    id_nb_victoires[id_el2][2] += 1
                elif put_after < put_before:
                    id_scores[id_el2] += 1
                    id_nb_victoires[id_el1][2] += 1
                    id_nb_victoires[id_el2][0] += 1
                else:
                    id_scores[id_el1] += 0.5
                    id_scores[id_el2] += 0.5
                    id_nb_victoires[id_el1][1] += 1
                    id_nb_victoires[id_el2][1] += 1

        sorted_ids = CopelandMethod.sorted_dictionary_keys(
            id_scores)  # liste des cles du dictionnaire trie par
        scores_elements = {}
        victories_elements = {}
        # valeurs decroissantes
        bucket = []
        previous_id = sorted_ids[0]
        for id_elem in sorted_ids:
            scores_elements[id_elements.get(id_elem)] = id_scores.get(id_elem)
            victories_elements[id_elements.get(id_elem)] = id_nb_victoires.get(
                id_elem)

            if id_scores.get(previous_id) == id_scores.get(
                    id_elem):  # si l'elem actuel a le meme score que l'element
                # precedent
                bucket.append(id_elements.get(
                    id_elem))  # on le place dans le meme bucket que celui ci
            else:
                res.append(
                    bucket
                )  # sinon, on concatene le bucket a la liste resultat
                bucket = [
                    id_elements.get(id_elem)
                ]  # on reinitialise le bucket avec le candidat actuel
            previous_id = id_elem
        res.append(bucket)
        return ConsensusSingleRanking(consensus_rankings=[res],
                                      dataset=dataset,
                                      scoring_scheme=scoring_scheme,
                                      att={
                                          ConsensusFeature.AssociatedAlgorithm:
                                          self.get_full_name(),
                                          ConsensusFeature.CopelandScores:
                                          scores_elements,
                                          ConsensusFeature.CopelandVictories:
                                          victories_elements
                                      })
Beispiel #12
0
from corankco.dataset import Dataset
from corankco.scoringscheme import ScoringScheme
from corankco.algorithms.algorithmChoice import get_algorithm
from corankco.algorithms.algorithmChoice import Algorithm
from corankco.kemeny_computation import KemenyComputingFactory

dataset = Dataset([[[1], [2, 3]], [[3, 1], [4]], [[1], [5], [3, 2]]])
# or d = Dataset.get_rankings_from_file(file_path), with file_path is the path to fhe file
# import a list of datasets in a same folder : Dataset.get_rankings_from_folder(path_folder)

# print information about the dataset
print(dataset.description())
# choose your scoring scheme (or sc = ScoringScheme() for default scoring scheme)
sc = ScoringScheme([[0., 1., 1., 0., 1., 1.], [1., 1., 0., 1., 1., 0.]])

print("scoring scheme : " + str(sc))
# scoring scheme description
print(sc.description())

print("\n### Consensus computation ###\n")

algorithm = get_algorithm(alg=Algorithm.ParCons,
                          parameters={"bound_for_exact": 90})
# compute consensus ranking
consensus = algorithm.compute_consensus_rankings(
    dataset=dataset, scoring_scheme=sc, return_at_most_one_ranking=False)

print(consensus.description())

# if you want the consensus ranking only : print(consensus)
# to get the consensus rankings : consensus.consensus_rankings
Beispiel #13
0
    def compute_consensus_rankings(
            self,
            dataset: Dataset,
            scoring_scheme: ScoringScheme,
            return_at_most_one_ranking=False,
            bench_mode=False
    ) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """
        if self.bound_for_exact > 0:
            from corankco.algorithms.exact.exactalgorithm import ExactAlgorithm

        optimal = True
        sc = asarray(scoring_scheme.penalty_vectors)
        rankings = dataset.rankings
        res = []
        elem_id = {}
        id_elements = {}
        id_elem = 0
        for ranking in rankings:
            for bucket in ranking:
                for element in bucket:
                    if element not in elem_id:
                        elem_id[element] = id_elem
                        id_elements[id_elem] = element
                        id_elem += 1

        positions = dataset.get_positions(elem_id)
        gr1, mat_score = self.__graph_of_elements(positions, sc)
        scc = gr1.components()
        for scc_i in scc:
            if len(scc_i) == 1:
                res.append([id_elements.get(scc_i[0])])
            else:
                all_tied = True
                for e1, e2 in combinations(scc_i, 2):
                    if mat_score[e1][e2][2] > mat_score[e1][e2][0] or mat_score[e1][e2][2] > mat_score[e1][e2][1]:
                        all_tied = False
                        break
                if all_tied:
                    buck = []
                    for el in scc_i:
                        buck.append(id_elements.get(el))
                    res.append(buck)
                else:
                    set_scc = set(scc_i)
                    project_rankings = []
                    for ranking in rankings:
                        project_ranking = []
                        for bucket in ranking:
                            project_bucket = []
                            for elem in bucket:
                                if elem_id.get(elem) in set_scc:
                                    project_bucket.append(elem)
                            if len(project_bucket) > 0:
                                project_ranking.append(project_bucket)
                        if len(project_ranking) > 0:
                            project_rankings.append(project_ranking)
                    if len(scc_i) > self.bound_for_exact:
                        cons_ext = self.auxiliary_alg.compute_consensus_rankings(Dataset(project_rankings),
                                                                                 scoring_scheme,
                                                                                 True).consensus_rankings[0]
                        res.extend(cons_ext)
                        optimal = False
                    else:
                        cons_ext = ExactAlgorithm(preprocess=False).compute_consensus_rankings(
                                                                            Dataset(project_rankings),
                                                                            scoring_scheme,
                                                                            True).consensus_rankings[0]
                        res.extend(cons_ext)
        hash_information = {ConsensusFeature.IsNecessarilyOptimal: optimal,
                            ConsensusFeature.AssociatedAlgorithm: self.get_full_name()
                            }
        if not bench_mode:
            cfc_name = []
            for scc_i in scc:
                group = set()
                for elem in scc_i:
                    group.add(id_elements.get(elem))
                cfc_name.append(group)
            hash_information[ConsensusFeature.WeakPartitioning] = cfc_name

        return Consensus(consensus_rankings=[res],
                         dataset=dataset,
                         scoring_scheme=scoring_scheme,
                         att=hash_information)
Beispiel #14
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings(
                scoring_scheme):
            raise ScoringSchemeNotHandledException

        if scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme().penalty_vectors) or \
                scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme_p(0.5).penalty_vectors):
            rankings_to_use = dataset.unified_rankings()
        else:
            rankings_to_use = dataset.rankings

        points = {}
        for ranking in rankings_to_use:
            id_bucket = 1
            for bucket in ranking:
                for elem in bucket:
                    if elem not in points:
                        points[elem] = {}
                        points[elem][0] = 0
                        points[elem][1] = 0

                    points[elem][0] += id_bucket
                    points[elem][1] += 1
                if self.useBucketIdAndNotBucketSize:
                    id_bucket += 1
                else:
                    id_bucket += len(bucket)
        lis = []
        for elem in points.keys():
            lis.append((elem, points[elem][0] * 1.0 / points[elem][1]))
        tri = sorted(lis, key=lambda col: col[1])
        consensus = []
        bucket = []
        last = -1
        for duo in tri:
            if duo[1] != last:
                last = duo[1]
                bucket = []
                consensus.append(bucket)
            bucket.append(duo[0])
        return Consensus(
            consensus_rankings=[consensus],
            dataset=dataset,
            scoring_scheme=scoring_scheme,
            att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})
Beispiel #15
0
    def compute_consensus_rankings(self,
                                   dataset: Dataset,
                                   scoring_scheme: ScoringScheme,
                                   return_at_most_one_ranking=False,
                                   bench_mode=False) -> Consensus:
        """
        :param dataset: A dataset containing the rankings to aggregate
        :type dataset: Dataset (class Dataset in package 'datasets')
        :param scoring_scheme: The penalty vectors to consider
        :type scoring_scheme: ScoringScheme (class ScoringScheme in package 'distances')
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :param bench_mode: is bench mode activated. If False, the algorithm may return more information
        :type bench_mode: bool
        :return one or more rankings if the underlying algorithm can find several equivalent consensus rankings
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found.
        In all scenario, the algorithm returns a list of consensus rankings
        :raise ScoringSchemeNotHandledException when the algorithm cannot compute the consensus because the
        implementation of the algorithm does not fit with the scoring scheme
        """

        if not dataset.is_complete and not self.is_scoring_scheme_relevant_when_incomplete_rankings(
                scoring_scheme):
            raise ScoringSchemeNotHandledException

        if scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme().penalty_vectors) or \
                scoring_scheme.is_equivalent_to(ScoringScheme.get_unifying_scoring_scheme_p(0.5).penalty_vectors):
            rankings_to_use = dataset.unified_rankings()
        else:
            rankings_to_use = dataset.rankings
        has = {}

        nb_rankings_needed = {}
        already_put = set()

        for ranking in rankings_to_use:
            for bucket in ranking:
                for element in bucket:
                    if element not in nb_rankings_needed:
                        nb_rankings_needed[element] = self.__h
                    else:
                        nb_rankings_needed[element] += self.__h

        bucket_res = []
        ranking_res = []

        for reorganized in zip_longest(*rankings_to_use):
            for bucket in reorganized:
                if bucket is not None:
                    for element in bucket:
                        if element not in already_put:
                            if element not in has:
                                has[element] = 1
                                if nb_rankings_needed[element] <= 1:
                                    bucket_res.append(element)
                                    already_put.add(element)
                            else:
                                has[element] += 1
                                if has[element] >= nb_rankings_needed[element]:
                                    bucket_res.append(element)
                                    already_put.add(element)
            if len(bucket_res) > 0:
                ranking_res.append(bucket_res)
                bucket_res = []

        rankings_consensus = [ranking_res] if len(ranking_res) > 0 else [[]]
        return Consensus(
            consensus_rankings=rankings_consensus,
            dataset=dataset,
            scoring_scheme=scoring_scheme,
            att={ConsensusFeature.AssociatedAlgorithm: self.get_full_name()})