def test_n(self): self.assertEqual(self.dataset0.n, 0) self.assertEqual(self.dataset1.n, 6) self.assertEqual(self.dataset2.n, 4) self.assertEqual(self.dataset3.n, 8) self.assertEqual(self.dataset4.n, 4) self.assertEqual(self.dataset5.n, 4) self.assertEqual(self.dataset6.n, 3) self.assertEqual(self.dataset7.n, 0) self.assertEqual(Unification.dataset_to_dataset(self.dataset0).n, 0) self.assertEqual(Unification.dataset_to_dataset(self.dataset1).n, 6) self.assertEqual(Unification.dataset_to_dataset(self.dataset2).n, 4) self.assertEqual(Unification.dataset_to_dataset(self.dataset3).n, 8) self.assertEqual(Unification.dataset_to_dataset(self.dataset4).n, 4) self.assertEqual(Unification.dataset_to_dataset(self.dataset5).n, 4) self.assertEqual(Unification.dataset_to_dataset(self.dataset6).n, 3) self.assertEqual(Unification.dataset_to_dataset(self.dataset7).n, 0) self.assertEqual(Projection.dataset_to_dataset(self.dataset0).n, 0) self.assertEqual(Projection.dataset_to_dataset(self.dataset1).n, 6) self.assertEqual(Projection.dataset_to_dataset(self.dataset2).n, 0) self.assertEqual(Projection.dataset_to_dataset(self.dataset3).n, 2) self.assertEqual(Projection.dataset_to_dataset(self.dataset4).n, 0) self.assertEqual(Projection.dataset_to_dataset(self.dataset5).n, 0) self.assertEqual(Projection.dataset_to_dataset(self.dataset6).n, 3) self.assertEqual(Projection.dataset_to_dataset(self.dataset7).n, 0)
def test_m(self): self.assertEqual(self.dataset0.m, 3) self.assertEqual(self.dataset1.m, 2) self.assertEqual(self.dataset2.m, 3) self.assertEqual(self.dataset3.m, 3) self.assertEqual(self.dataset4.m, 4) self.assertEqual(self.dataset5.m, 4) self.assertEqual(self.dataset6.m, 3) self.assertEqual(self.dataset7.m, 0) self.assertEqual(Unification.dataset_to_dataset(self.dataset0).m, 3) self.assertEqual(Unification.dataset_to_dataset(self.dataset1).m, 2) self.assertEqual(Unification.dataset_to_dataset(self.dataset2).m, 3) self.assertEqual(Unification.dataset_to_dataset(self.dataset3).m, 3) self.assertEqual(Unification.dataset_to_dataset(self.dataset4).m, 4) self.assertEqual(Unification.dataset_to_dataset(self.dataset5).m, 4) self.assertEqual(Unification.dataset_to_dataset(self.dataset6).m, 3) self.assertEqual(Unification.dataset_to_dataset(self.dataset7).m, 0) self.assertEqual(Projection.dataset_to_dataset(self.dataset0).m, 3) self.assertEqual(Projection.dataset_to_dataset(self.dataset1).m, 2) self.assertEqual(Projection.dataset_to_dataset(self.dataset2).m, 3) self.assertEqual(Projection.dataset_to_dataset(self.dataset3).m, 3) self.assertEqual(Projection.dataset_to_dataset(self.dataset4).m, 4) self.assertEqual(Projection.dataset_to_dataset(self.dataset5).m, 4) self.assertEqual(Projection.dataset_to_dataset(self.dataset6).m, 3) self.assertEqual(Projection.dataset_to_dataset(self.dataset7).m, 0)
def execute_median_rankings_computation_from_datasets( datasets, algorithm, normalization, distance, precise_time_measurement, algorithms=None, ): submission_results = [] algorithms = algorithms or [] if algorithm is not None: algorithms.append(algorithm) for d in datasets: if not d.complete: if str(normalization) == "Unification": rankings_real = Unification.rankings_to_rankings(d.rankings) elif str(normalization) == "Projection": rankings_real = Projection.rankings_to_rankings(d.rankings) else: rankings_real = d.rankings else: rankings_real = d.rankings for a in algorithms: submission_results.append( execute_median_rankings_computation_from_rankings( rankings=rankings_real, algorithm=a, normalization=normalization, distance=distance, precise_time_measurement=precise_time_measurement, dataset=d, )) return submission_results
def compute_median_rankings( self, rankings: List[List[List[int]]], distance, return_at_most_one_ranking: bool = False)-> List[List[List[int]]]: """ :param rankings: A set of rankings :type rankings: list :param distance: The distance to use/consider :type distance: Distance :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :return one or more consensus if the underlying algorithm can find multiple solution as good as each other. If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found :raise DistanceNotHandledException when the algorit hm cannot compute the consensus following the distance given as parameter """ scoring_scheme = asarray(distance.scoring_scheme) if array_equal(scoring_scheme, array([[0, 1, 1, 0, 1, 1], [1, 1, 0, 1, 1, 0]])): dst = 0 elif array_equal(scoring_scheme, array([[0, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1]])): dst = 1 elif array_equal(scoring_scheme, array([[0, 1, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0]])): dst = 2 else: raise DistanceNotHandledException if dst == 0: rankings_to_use = Unification.rankings_to_rankings(rankings) else: rankings_to_use = rankings nb_rankings = len(rankings_to_use) rankings_copy = list(rankings_to_use) shuffle(rankings_copy) h = {} id_ranking = 0 for ranking in rankings_copy: id_bucket = 0 for bucket in ranking: for element in bucket: if element not in h: h[element] = zeros(nb_rankings, dtype=int) - 1 h[element][id_ranking] = id_bucket id_bucket += 1 id_ranking += 1 res = [] for el in sorted(h.items(), key=cmp_to_key(RepeatChoice.__compare)): res.append([el[0]]) # kem = KemenyComputingFactory(scoring_scheme=self.scoring_scheme) # kem = KendallTauGeneralizedNlogN() return [res]
def __departure_rankings(self, rankings: List[List[List[int]]], positions: ndarray, elements_id: Dict, distance) \ -> Tuple[ndarray, ndarray]: dst_id = distance.id_order dst_ini = [] rankings_unified = Unification.rankings_to_rankings(rankings) kem_comp = KendallTauGeneralizedNlogN(distance) if len(self.starting_algorithms) == 0: real_pos = array(positions).transpose() distinct_rankings = set() list_distinct_id_rankings = [] i = 0 for ranking in rankings_unified: ranking_array = real_pos[i] ranking_array[ranking_array == -1] = amax(ranking_array) + 1 string_ranking = str(ranking_array) if string_ranking not in distinct_rankings: distinct_rankings.add(string_ranking) list_distinct_id_rankings.append(i) dst_ini.append( kem_comp.get_distance_to_a_set_of_rankings( ranking, rankings)[dst_id]) i += 1 dst_ini.append( kem_comp.get_distance_to_a_set_of_rankings([[*elements_id]], rankings)[dst_id]) departure = zeros( (len(list_distinct_id_rankings) + 1, len(elements_id)), dtype=int32) departure[:-1] = real_pos[asarray(list_distinct_id_rankings)] else: m = len(self.starting_algorithms) n = len(elements_id) departure = zeros((m, n), dtype=int32) - 1 id_ranking = 0 for algo in self.starting_algorithms: cons = algo.compute_median_rankings(rankings_unified, distance, True)[0] dst_ini.append( kem_comp.get_distance_to_a_set_of_rankings( cons, rankings)[distance.id_order]) id_bucket = 0 for bucket in cons: for element in bucket: departure[id_ranking][elements_id.get( element)] = id_bucket id_bucket += 1 id_ranking += 1 return departure, array(dst_ini, dtype=float64)
def test_unification(self): self.assertEqual(Unification.dataset_to_rankings(self.dataset0), [[], [], []]) self.assertEqual( Unification.dataset_to_rankings(self.dataset1), [[[1], [2], [3, 4, 5], [6]], [[1], [2], [3, 4, 5], [6]]]) self.assertEqual( Unification.dataset_to_rankings(self.dataset2), [[[1], [2], [3, 4]], [[3], [4], [1, 2]], [[1], [2, 3, 4]]]) self.assertEqual( Unification.dataset_to_rankings(self.dataset3), [[[1], [2, 3, 4], [5], [7, 8, 9]], [[3, 5, 7], [8], [1, 2, 4, 9]], [[3, 2, 4, 5, 7, 9], [1, 8]]]) self.assertEqual(Unification.dataset_to_rankings(self.dataset4), [[[1], [2], [3, 4]], [[3], [4], [1, 2]], [[1], [2, 3, 4]], [[1, 2, 3, 4]]])
def __get_positions(rankings: List[List[List[int]]], elements_id: Dict) -> ndarray: m = len(rankings) n = len(elements_id) rankings_unified = Unification.rankings_to_rankings(rankings) positions = zeros((n, m), dtype=int32) - 1 id_ranking = 0 for ranking in rankings_unified: id_bucket = 0 for bucket in ranking: for element in bucket: positions[elements_id.get(element)][id_ranking] = id_bucket id_bucket += 1 id_ranking += 1 return positions
def test_completude(self): self.assertTrue(self.dataset0.is_complete) self.assertTrue(self.dataset1.is_complete) self.assertTrue(self.dataset6.is_complete) self.assertFalse(self.dataset2.is_complete) self.assertFalse(self.dataset3.is_complete) self.assertFalse(self.dataset4.is_complete) self.assertFalse(self.dataset5.is_complete) self.assertTrue( Projection.dataset_to_dataset(self.dataset0).is_complete) self.assertTrue( Projection.dataset_to_dataset(self.dataset1).is_complete) self.assertTrue( Projection.dataset_to_dataset(self.dataset2).is_complete) self.assertTrue( Projection.dataset_to_dataset(self.dataset3).is_complete) self.assertTrue( Projection.dataset_to_dataset(self.dataset4).is_complete) self.assertTrue( Projection.dataset_to_dataset(self.dataset5).is_complete) self.assertTrue( Projection.dataset_to_dataset(self.dataset6).is_complete) self.assertTrue( Unification.dataset_to_dataset(self.dataset0).is_complete) self.assertTrue( Unification.dataset_to_dataset(self.dataset1).is_complete) self.assertTrue( Unification.dataset_to_dataset(self.dataset2).is_complete) self.assertTrue( Unification.dataset_to_dataset(self.dataset3).is_complete) self.assertTrue( Unification.dataset_to_dataset(self.dataset4).is_complete) self.assertTrue( Unification.dataset_to_dataset(self.dataset5).is_complete) self.assertTrue( Unification.dataset_to_dataset(self.dataset6).is_complete) # Check integrity of original datasets self.assertTrue(self.dataset0.is_complete) self.assertTrue(self.dataset1.is_complete) self.assertTrue(self.dataset6.is_complete) self.assertFalse(self.dataset2.is_complete) self.assertFalse(self.dataset3.is_complete) self.assertFalse(self.dataset4.is_complete) self.assertFalse(self.dataset5.is_complete)
def __departure_rankings(self, rankings: List[List[List[int]]], elements_id: Dict, distance) -> ndarray: if len(self.starting_algorithms) == 0: m = len(rankings) n = len(elements_id) departure = zeros((n, m + 1), dtype=int) - 1 id_ranking = 0 for ranking in rankings: id_bucket = 0 for bucket in ranking: for element in bucket: departure[elements_id.get( element)][id_ranking] = id_bucket id_bucket += 1 id_ranking += 1 departure[:, -1] = zeros(n) else: m = len(self.starting_algorithms) n = len(elements_id) departure = zeros((n, m), dtype=int) - 1 id_ranking = 0 for algo in self.starting_algorithms: try: cons = algo.compute_median_rankings( rankings, distance, True)[0] except DistanceNotHandledException: cons = algo.compute_median_rankings( Unification.rankings_to_rankings(rankings), distance, True)[0] id_bucket = 0 for bucket in cons: for element in bucket: departure[elements_id.get( element)][id_ranking] = id_bucket id_bucket += 1 id_ranking += 1 return departure
def compute_median_rankings( self, rankings: List[List[List[int]]], distance, return_at_most_one_ranking: bool = False) -> List[List[List[int]]]: """ :param rankings: A set of rankings :type rankings: list :param distance: The distance to use/consider :type distance: Distance :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :return one or more consensus if the underlying algorithm can find multiple solution as good as each other. If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found :raise DistanceNotHandledException when the algorithm cannot compute the consensus following the distance given as parameter """ if distance is None: scoring_scheme = ScoringScheme().matrix else: scoring_scheme = asarray(distance.scoring_scheme) if array_equal(scoring_scheme, array([[0, 1, 1, 0, 1, 1], [1, 1, 0, 1, 1, 0]])): dst = 0 elif array_equal(scoring_scheme, array([[0, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1]])): dst = 1 elif array_equal(scoring_scheme, array([[0, 1, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0]])): dst = 2 else: raise DistanceNotHandledException if dst == 0: rankings_to_use = Unification.rankings_to_rankings(rankings) else: rankings_to_use = rankings points = {} for ranking in rankings_to_use: id_bucket = 1 for bucket in ranking: for elem in bucket: if elem not in points: points[elem] = {} points[elem][0] = 0 points[elem][1] = 0 points[elem][0] += id_bucket points[elem][1] += 1 if self.useBucketIdAndNotBucketSize: id_bucket += 1 else: id_bucket += len(bucket) lis = [] for elem in points.keys(): lis.append((elem, points[elem][0] * 1.0 / points[elem][1])) tri = sorted(lis, key=lambda col: col[1]) consensus = [] bucket = [] last = -1 for duo in tri: if duo[1] != last: last = duo[1] bucket = [] consensus.append(bucket) bucket.append(duo[0]) return [consensus]
def compute_median_rankings( self, rankings: List[List[List[int]]], distance, return_at_most_one_ranking: bool = False) -> List[List[List[int]]]: """ :param rankings: A set of rankings :type rankings: list :param distance: The distance to use/consider :type distance: Distance :param return_at_most_one_ranking: the algorithm should not return more than one ranking :type return_at_most_one_ranking: bool :return one or more consensus if the underlying algorithm can find multiple solution as good as each other. If the algorithm is not able to provide multiple consensus, or if return_at_most_one_ranking is True then, it should return a list made of the only / the first consensus found :raise DistanceNotHandledException when the algorithm cannot compute the consensus following the distance given as parameter """ scoring_scheme = asarray(distance.scoring_scheme) if array_equal(scoring_scheme, array([[0, 1, 1, 0, 1, 1], [1, 1, 0, 1, 1, 0]])): dst = 0 elif array_equal(scoring_scheme, array([[0, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1]])): dst = 1 elif array_equal(scoring_scheme, array([[0, 1, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0]])): dst = 2 else: raise DistanceNotHandledException if dst == 0: rankings_to_use = Unification.rankings_to_rankings(rankings) else: rankings_to_use = rankings has = {} nb_rankings_needed = {} already_put = set() for ranking in rankings_to_use: for bucket in ranking: for element in bucket: if element not in nb_rankings_needed: nb_rankings_needed[element] = self.h else: nb_rankings_needed[element] += self.h bucket_res = [] ranking_res = [] for reorganized in zip_longest(*rankings_to_use): for bucket in reorganized: if bucket is not None: for element in bucket: if element not in already_put: if element not in has: has[element] = 1 if nb_rankings_needed[element] <= 1: bucket_res.append(element) already_put.add(element) else: has[element] += 1 if has[element] >= nb_rankings_needed[element]: bucket_res.append(element) already_put.add(element) if len(bucket_res) > 0: ranking_res.append(bucket_res) bucket_res = [] return [ranking_res] if len(ranking_res) > 0 else [[]]
def execute_median_rankings_computation_from_rankings( rankings, algorithm, normalization, distance, precise_time_measurement, dataset=None, algorithms=None, ): if str(normalization) == "Unification": rankings_real = Unification.rankings_to_rankings(rankings) elif str(normalization) == "Projection": rankings_real = Projection.rankings_to_rankings(rankings) else: rankings_real = rankings if algorithms: return [ execute_median_rankings_computation_from_rankings( rankings=rankings_real, algorithm=a, normalization=normalization, distance=distance, precise_time_measurement=precise_time_measurement, dataset=dataset, ) for a in algorithms ] iteration = 1 start_timezone = timezone.now() c = algorithm.compute_median_rankings(rankings=rankings_real, distance=distance) duration = (timezone.now() - start_timezone).total_seconds() while precise_time_measurement and duration < MIN_MEASURE_DURATION: # print(iteration, duration) iteration = int((iteration / duration) * MIN_MEASURE_DURATION * 1.1) rang_iter = range(2, iteration) start_timezone = timezone.now() for k in rang_iter: algorithm.compute_median_rankings(rankings=rankings_real, distance=distance) duration = (timezone.now() - start_timezone).total_seconds() return dict( dataset=dict( id=-1, name=ugettext('typed'), ) if dataset is None else dict( id=dataset.id, name=str(dataset), ), consensus=c, distance=KendallTauGeneralizedNlogN( distance).get_distance_to_a_set_of_rankings( c[0], rankings=rankings, )[distance.id_order], duration=(int(duration / iteration * 1000.0 * 1000.0 * 1000.0)) / 1000.0 / 1000.0, algo=dict( id=algorithm.get_full_name(), name=str(get_name_from(algorithm.get_full_name())), ), )