def test_n(self):
        self.assertEqual(self.dataset0.n, 0)
        self.assertEqual(self.dataset1.n, 6)
        self.assertEqual(self.dataset2.n, 4)
        self.assertEqual(self.dataset3.n, 8)
        self.assertEqual(self.dataset4.n, 4)
        self.assertEqual(self.dataset5.n, 4)
        self.assertEqual(self.dataset6.n, 3)
        self.assertEqual(self.dataset7.n, 0)

        self.assertEqual(Unification.dataset_to_dataset(self.dataset0).n, 0)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset1).n, 6)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset2).n, 4)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset3).n, 8)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset4).n, 4)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset5).n, 4)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset6).n, 3)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset7).n, 0)

        self.assertEqual(Projection.dataset_to_dataset(self.dataset0).n, 0)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset1).n, 6)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset2).n, 0)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset3).n, 2)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset4).n, 0)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset5).n, 0)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset6).n, 3)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset7).n, 0)
    def test_m(self):
        self.assertEqual(self.dataset0.m, 3)
        self.assertEqual(self.dataset1.m, 2)
        self.assertEqual(self.dataset2.m, 3)
        self.assertEqual(self.dataset3.m, 3)
        self.assertEqual(self.dataset4.m, 4)
        self.assertEqual(self.dataset5.m, 4)
        self.assertEqual(self.dataset6.m, 3)
        self.assertEqual(self.dataset7.m, 0)

        self.assertEqual(Unification.dataset_to_dataset(self.dataset0).m, 3)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset1).m, 2)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset2).m, 3)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset3).m, 3)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset4).m, 4)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset5).m, 4)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset6).m, 3)
        self.assertEqual(Unification.dataset_to_dataset(self.dataset7).m, 0)

        self.assertEqual(Projection.dataset_to_dataset(self.dataset0).m, 3)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset1).m, 2)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset2).m, 3)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset3).m, 3)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset4).m, 4)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset5).m, 4)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset6).m, 3)
        self.assertEqual(Projection.dataset_to_dataset(self.dataset7).m, 0)
Exemple #3
0
def execute_median_rankings_computation_from_datasets(
    datasets,
    algorithm,
    normalization,
    distance,
    precise_time_measurement,
    algorithms=None,
):

    submission_results = []
    algorithms = algorithms or []
    if algorithm is not None:
        algorithms.append(algorithm)
    for d in datasets:
        if not d.complete:
            if str(normalization) == "Unification":
                rankings_real = Unification.rankings_to_rankings(d.rankings)
            elif str(normalization) == "Projection":
                rankings_real = Projection.rankings_to_rankings(d.rankings)
            else:
                rankings_real = d.rankings
        else:
            rankings_real = d.rankings
        for a in algorithms:
            submission_results.append(
                execute_median_rankings_computation_from_rankings(
                    rankings=rankings_real,
                    algorithm=a,
                    normalization=normalization,
                    distance=distance,
                    precise_time_measurement=precise_time_measurement,
                    dataset=d,
                ))

    return submission_results
Exemple #4
0
    def compute_median_rankings(
            self,
            rankings: List[List[List[int]]],
            distance,
            return_at_most_one_ranking: bool = False)-> List[List[List[int]]]:
        """
        :param rankings: A set of rankings
        :type rankings: list
        :param distance: The distance to use/consider
        :type distance: Distance
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :return one or more consensus if the underlying algorithm can find multiple solution as good as each other.
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found
        :raise DistanceNotHandledException when the algorit
        hm cannot compute the consensus following the distance given
        as parameter
        """

        scoring_scheme = asarray(distance.scoring_scheme)
        if array_equal(scoring_scheme, array([[0, 1, 1, 0, 1, 1], [1, 1, 0, 1, 1, 0]])):
            dst = 0
        elif array_equal(scoring_scheme, array([[0, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1]])):
            dst = 1
        elif array_equal(scoring_scheme, array([[0, 1, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0]])):
            dst = 2
        else:
            raise DistanceNotHandledException

        if dst == 0:
            rankings_to_use = Unification.rankings_to_rankings(rankings)
        else:
            rankings_to_use = rankings

        nb_rankings = len(rankings_to_use)
        rankings_copy = list(rankings_to_use)
        shuffle(rankings_copy)
        h = {}
        id_ranking = 0
        for ranking in rankings_copy:
            id_bucket = 0
            for bucket in ranking:
                for element in bucket:
                    if element not in h:
                        h[element] = zeros(nb_rankings, dtype=int) - 1
                    h[element][id_ranking] = id_bucket
                id_bucket += 1
            id_ranking += 1

        res = []
        for el in sorted(h.items(), key=cmp_to_key(RepeatChoice.__compare)):
            res.append([el[0]])

        # kem = KemenyComputingFactory(scoring_scheme=self.scoring_scheme)
        # kem = KendallTauGeneralizedNlogN()
        return [res]
Exemple #5
0
    def __departure_rankings(self, rankings: List[List[List[int]]], positions: ndarray, elements_id: Dict, distance) \
            -> Tuple[ndarray, ndarray]:

        dst_id = distance.id_order
        dst_ini = []
        rankings_unified = Unification.rankings_to_rankings(rankings)
        kem_comp = KendallTauGeneralizedNlogN(distance)
        if len(self.starting_algorithms) == 0:
            real_pos = array(positions).transpose()
            distinct_rankings = set()
            list_distinct_id_rankings = []

            i = 0
            for ranking in rankings_unified:
                ranking_array = real_pos[i]
                ranking_array[ranking_array == -1] = amax(ranking_array) + 1
                string_ranking = str(ranking_array)
                if string_ranking not in distinct_rankings:
                    distinct_rankings.add(string_ranking)
                    list_distinct_id_rankings.append(i)

                    dst_ini.append(
                        kem_comp.get_distance_to_a_set_of_rankings(
                            ranking, rankings)[dst_id])

                i += 1

            dst_ini.append(
                kem_comp.get_distance_to_a_set_of_rankings([[*elements_id]],
                                                           rankings)[dst_id])
            departure = zeros(
                (len(list_distinct_id_rankings) + 1, len(elements_id)),
                dtype=int32)
            departure[:-1] = real_pos[asarray(list_distinct_id_rankings)]
        else:
            m = len(self.starting_algorithms)
            n = len(elements_id)
            departure = zeros((m, n), dtype=int32) - 1
            id_ranking = 0
            for algo in self.starting_algorithms:
                cons = algo.compute_median_rankings(rankings_unified, distance,
                                                    True)[0]
                dst_ini.append(
                    kem_comp.get_distance_to_a_set_of_rankings(
                        cons, rankings)[distance.id_order])
                id_bucket = 0
                for bucket in cons:
                    for element in bucket:
                        departure[id_ranking][elements_id.get(
                            element)] = id_bucket
                    id_bucket += 1
                id_ranking += 1

        return departure, array(dst_ini, dtype=float64)
    def test_unification(self):

        self.assertEqual(Unification.dataset_to_rankings(self.dataset0),
                         [[], [], []])

        self.assertEqual(
            Unification.dataset_to_rankings(self.dataset1),
            [[[1], [2], [3, 4, 5], [6]], [[1], [2], [3, 4, 5], [6]]])

        self.assertEqual(
            Unification.dataset_to_rankings(self.dataset2),
            [[[1], [2], [3, 4]], [[3], [4], [1, 2]], [[1], [2, 3, 4]]])

        self.assertEqual(
            Unification.dataset_to_rankings(self.dataset3),
            [[[1], [2, 3, 4], [5], [7, 8, 9]], [[3, 5, 7], [8], [1, 2, 4, 9]],
             [[3, 2, 4, 5, 7, 9], [1, 8]]])

        self.assertEqual(Unification.dataset_to_rankings(self.dataset4),
                         [[[1], [2], [3, 4]], [[3], [4], [1, 2]],
                          [[1], [2, 3, 4]], [[1, 2, 3, 4]]])
 def __get_positions(rankings: List[List[List[int]]],
                     elements_id: Dict) -> ndarray:
     m = len(rankings)
     n = len(elements_id)
     rankings_unified = Unification.rankings_to_rankings(rankings)
     positions = zeros((n, m), dtype=int32) - 1
     id_ranking = 0
     for ranking in rankings_unified:
         id_bucket = 0
         for bucket in ranking:
             for element in bucket:
                 positions[elements_id.get(element)][id_ranking] = id_bucket
             id_bucket += 1
         id_ranking += 1
     return positions
    def test_completude(self):
        self.assertTrue(self.dataset0.is_complete)
        self.assertTrue(self.dataset1.is_complete)
        self.assertTrue(self.dataset6.is_complete)
        self.assertFalse(self.dataset2.is_complete)
        self.assertFalse(self.dataset3.is_complete)
        self.assertFalse(self.dataset4.is_complete)
        self.assertFalse(self.dataset5.is_complete)
        self.assertTrue(
            Projection.dataset_to_dataset(self.dataset0).is_complete)
        self.assertTrue(
            Projection.dataset_to_dataset(self.dataset1).is_complete)
        self.assertTrue(
            Projection.dataset_to_dataset(self.dataset2).is_complete)
        self.assertTrue(
            Projection.dataset_to_dataset(self.dataset3).is_complete)
        self.assertTrue(
            Projection.dataset_to_dataset(self.dataset4).is_complete)
        self.assertTrue(
            Projection.dataset_to_dataset(self.dataset5).is_complete)
        self.assertTrue(
            Projection.dataset_to_dataset(self.dataset6).is_complete)

        self.assertTrue(
            Unification.dataset_to_dataset(self.dataset0).is_complete)
        self.assertTrue(
            Unification.dataset_to_dataset(self.dataset1).is_complete)
        self.assertTrue(
            Unification.dataset_to_dataset(self.dataset2).is_complete)
        self.assertTrue(
            Unification.dataset_to_dataset(self.dataset3).is_complete)
        self.assertTrue(
            Unification.dataset_to_dataset(self.dataset4).is_complete)
        self.assertTrue(
            Unification.dataset_to_dataset(self.dataset5).is_complete)
        self.assertTrue(
            Unification.dataset_to_dataset(self.dataset6).is_complete)

        # Check integrity of original datasets
        self.assertTrue(self.dataset0.is_complete)
        self.assertTrue(self.dataset1.is_complete)
        self.assertTrue(self.dataset6.is_complete)
        self.assertFalse(self.dataset2.is_complete)
        self.assertFalse(self.dataset3.is_complete)
        self.assertFalse(self.dataset4.is_complete)
        self.assertFalse(self.dataset5.is_complete)
 def __departure_rankings(self, rankings: List[List[List[int]]],
                          elements_id: Dict, distance) -> ndarray:
     if len(self.starting_algorithms) == 0:
         m = len(rankings)
         n = len(elements_id)
         departure = zeros((n, m + 1), dtype=int) - 1
         id_ranking = 0
         for ranking in rankings:
             id_bucket = 0
             for bucket in ranking:
                 for element in bucket:
                     departure[elements_id.get(
                         element)][id_ranking] = id_bucket
                 id_bucket += 1
             id_ranking += 1
         departure[:, -1] = zeros(n)
     else:
         m = len(self.starting_algorithms)
         n = len(elements_id)
         departure = zeros((n, m), dtype=int) - 1
         id_ranking = 0
         for algo in self.starting_algorithms:
             try:
                 cons = algo.compute_median_rankings(
                     rankings, distance, True)[0]
             except DistanceNotHandledException:
                 cons = algo.compute_median_rankings(
                     Unification.rankings_to_rankings(rankings), distance,
                     True)[0]
             id_bucket = 0
             for bucket in cons:
                 for element in bucket:
                     departure[elements_id.get(
                         element)][id_ranking] = id_bucket
                 id_bucket += 1
             id_ranking += 1
     return departure
Exemple #10
0
    def compute_median_rankings(
            self,
            rankings: List[List[List[int]]],
            distance,
            return_at_most_one_ranking: bool = False) -> List[List[List[int]]]:
        """
        :param rankings: A set of rankings
        :type rankings: list
        :param distance: The distance to use/consider
        :type distance: Distance
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :return one or more consensus if the underlying algorithm can find multiple solution as good as each other.
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found
        :raise DistanceNotHandledException when the algorithm cannot compute the consensus following the distance given
        as parameter
        """
        if distance is None:
            scoring_scheme = ScoringScheme().matrix
        else:
            scoring_scheme = asarray(distance.scoring_scheme)
        if array_equal(scoring_scheme,
                       array([[0, 1, 1, 0, 1, 1], [1, 1, 0, 1, 1, 0]])):
            dst = 0
        elif array_equal(scoring_scheme,
                         array([[0, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1]])):
            dst = 1
        elif array_equal(scoring_scheme,
                         array([[0, 1, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0]])):
            dst = 2
        else:
            raise DistanceNotHandledException

        if dst == 0:
            rankings_to_use = Unification.rankings_to_rankings(rankings)
        else:
            rankings_to_use = rankings

        points = {}
        for ranking in rankings_to_use:
            id_bucket = 1
            for bucket in ranking:
                for elem in bucket:
                    if elem not in points:
                        points[elem] = {}
                        points[elem][0] = 0
                        points[elem][1] = 0

                    points[elem][0] += id_bucket
                    points[elem][1] += 1
                if self.useBucketIdAndNotBucketSize:
                    id_bucket += 1
                else:
                    id_bucket += len(bucket)
        lis = []
        for elem in points.keys():
            lis.append((elem, points[elem][0] * 1.0 / points[elem][1]))
        tri = sorted(lis, key=lambda col: col[1])
        consensus = []
        bucket = []
        last = -1
        for duo in tri:
            if duo[1] != last:
                last = duo[1]
                bucket = []
                consensus.append(bucket)
            bucket.append(duo[0])
        return [consensus]
    def compute_median_rankings(
            self,
            rankings: List[List[List[int]]],
            distance,
            return_at_most_one_ranking: bool = False) -> List[List[List[int]]]:
        """
        :param rankings: A set of rankings
        :type rankings: list
        :param distance: The distance to use/consider
        :type distance: Distance
        :param return_at_most_one_ranking: the algorithm should not return more than one ranking
        :type return_at_most_one_ranking: bool
        :return one or more consensus if the underlying algorithm can find multiple solution as good as each other.
        If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it
        should return a list made of the only / the first consensus found
        :raise DistanceNotHandledException when the algorithm cannot compute the consensus following the distance given
        as parameter
        """

        scoring_scheme = asarray(distance.scoring_scheme)
        if array_equal(scoring_scheme,
                       array([[0, 1, 1, 0, 1, 1], [1, 1, 0, 1, 1, 0]])):
            dst = 0
        elif array_equal(scoring_scheme,
                         array([[0, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1]])):
            dst = 1
        elif array_equal(scoring_scheme,
                         array([[0, 1, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0]])):
            dst = 2
        else:
            raise DistanceNotHandledException

        if dst == 0:
            rankings_to_use = Unification.rankings_to_rankings(rankings)
        else:
            rankings_to_use = rankings
        has = {}

        nb_rankings_needed = {}
        already_put = set()

        for ranking in rankings_to_use:
            for bucket in ranking:
                for element in bucket:
                    if element not in nb_rankings_needed:
                        nb_rankings_needed[element] = self.h
                    else:
                        nb_rankings_needed[element] += self.h

        bucket_res = []
        ranking_res = []

        for reorganized in zip_longest(*rankings_to_use):
            for bucket in reorganized:
                if bucket is not None:
                    for element in bucket:
                        if element not in already_put:
                            if element not in has:
                                has[element] = 1
                                if nb_rankings_needed[element] <= 1:
                                    bucket_res.append(element)
                                    already_put.add(element)
                            else:
                                has[element] += 1
                                if has[element] >= nb_rankings_needed[element]:
                                    bucket_res.append(element)
                                    already_put.add(element)
            if len(bucket_res) > 0:
                ranking_res.append(bucket_res)
                bucket_res = []

        return [ranking_res] if len(ranking_res) > 0 else [[]]
Exemple #12
0
def execute_median_rankings_computation_from_rankings(
    rankings,
    algorithm,
    normalization,
    distance,
    precise_time_measurement,
    dataset=None,
    algorithms=None,
):
    if str(normalization) == "Unification":
        rankings_real = Unification.rankings_to_rankings(rankings)
    elif str(normalization) == "Projection":
        rankings_real = Projection.rankings_to_rankings(rankings)
    else:
        rankings_real = rankings
    if algorithms:
        return [
            execute_median_rankings_computation_from_rankings(
                rankings=rankings_real,
                algorithm=a,
                normalization=normalization,
                distance=distance,
                precise_time_measurement=precise_time_measurement,
                dataset=dataset,
            ) for a in algorithms
        ]
    iteration = 1
    start_timezone = timezone.now()
    c = algorithm.compute_median_rankings(rankings=rankings_real,
                                          distance=distance)
    duration = (timezone.now() - start_timezone).total_seconds()
    while precise_time_measurement and duration < MIN_MEASURE_DURATION:
        # print(iteration, duration)
        iteration = int((iteration / duration) * MIN_MEASURE_DURATION * 1.1)
        rang_iter = range(2, iteration)
        start_timezone = timezone.now()
        for k in rang_iter:
            algorithm.compute_median_rankings(rankings=rankings_real,
                                              distance=distance)
        duration = (timezone.now() - start_timezone).total_seconds()

    return dict(
        dataset=dict(
            id=-1,
            name=ugettext('typed'),
        ) if dataset is None else dict(
            id=dataset.id,
            name=str(dataset),
        ),
        consensus=c,
        distance=KendallTauGeneralizedNlogN(
            distance).get_distance_to_a_set_of_rankings(
                c[0],
                rankings=rankings,
            )[distance.id_order],
        duration=(int(duration / iteration * 1000.0 * 1000.0 * 1000.0)) /
        1000.0 / 1000.0,
        algo=dict(
            id=algorithm.get_full_name(),
            name=str(get_name_from(algorithm.get_full_name())),
        ),
    )