def test_ranking(dataset='HDI'): """Test that PIIMV computes same ranking as PII when no missing value.""" data_set = 'data/' + dataset + '/raw.csv' alts, weights = dr.open_raw(data_set)[0][0:5], dr.open_raw(data_set)[1] # print(alts) # print(weights) if weights == []: weights = None if dataset == 'HDI': weights = [0.5, 0.5] ceils = [3, 3] promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils) prometheeMV = prom.PrometheeMV(alts, weights=weights, ceils=ceils) else: seed = 1 promethee = prom.PrometheeII(alts, weights=weights, seed=seed) prometheeMV = prom.PrometheeMV(alts, weights=weights, seed=seed) # print(promethee.ceils, promethee.weights) scores = promethee.scores scoresMV = prometheeMV.scores rank = promethee.ranking rankMV = prometheeMV.ranking for i in range(len(rank)): print( str(rank[i] + 1) + '::' + str(scores[rank[i]]) + " :::: " + str(rankMV[i] + 1) + '::' + str(scoresMV[rank[i]]))
def test_ranking(dataset='HDI'): """Test that PII computes the same ranking that in the article RobustPII. The following mappings should however be applied between countries and and indices: 0 - Norway 10 - Singapore 1 - Australia 11 - Hong Kong 2 - Switzerland 12 - Liechtenstein 3 - Denmark 13 - Sweden 4 - Netherlands 14 - United Kingdom 5 - Germany 15 - Iceland 6 - Ireland 16 - Korea 7 - United States 17 - Israel 8 - Canada 18 - Luxembourg 9 - New Zealand 19 - Japan The ranking expected is: 2::0.31491228070175437 1::0.2500000000000007 8::0.18245614035087707 11::0.18070175438596484 19::0.16315789473684195 17::0.16228070175438677 9::0.059649122807016945 13::0.058771929824561676 0::0.04210526315789358 5::0.007894736842106042 14::-0.02543859649122777 16::-0.02807017543859552 10::-0.07105263157894759 4::-0.08070175438596594 18::-0.09824561403508743 15::-0.13771929824561518 6::-0.14999999999999925 3::-0.17631578947368398 7::-0.28859649122807074 12::-0.3657894736842105 """ data_set = 'data/' + dataset + '/raw.csv' alts, weights = dr.open_raw(data_set)[0][0:20], dr.open_raw(data_set)[1] # print(alts) if weights == []: weights = None if dataset == 'HDI': weights = [0.5, 0.5] ceils = [3, 3] promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils) else: seed = 1 promethee = prom.PrometheeII(alts, weights=weights, seed=seed) print(promethee.ceils, promethee.weights) print(sum(promethee.weights)) scores = promethee.scores rank = promethee.ranking for i in range(len(rank)): print(str(rank[i] + 1) + '::' + str(scores[rank[i]]))
def test_rr_analysis(data='HDI'): """Check that the rank reversals are correct. These rank reversal should be compared to the one occuring in the article: 'About the computation of robust PROMETHEE II rankings: empirical evidence' by De Smet. The following mappings should however be applied between countries and and indices for the HDI data set: 0 - Norway 10 - Singapore 1 - Australia 11 - Hong Kong 2 - Switzerland 12 - Liechtenstein 3 - Denmark 13 - Sweden 4 - Netherlands 14 - United Kingdom 5 - Germany 15 - Iceland 6 - Ireland 16 - Korea 7 - United States 17 - Israel 8 - Canada 18 - Luxembourg 9 - New Zealand 19 - Japan """ # Data initialisation according to the data set if (data == 'HDI'): data_set = 'data/HDI/raw.csv' alts = dr.open_raw(data_set)[0] ceils = [3, 3] weights = [0.5, 0.5] promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils) elif (data == 'SHA'): data_set = 'data/SHA/raw_20.csv' alts, weights, coeff, ceils = dr.open_raw(data_set) promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils) elif (data == 'EPI'): data_set = 'data/EPI/raw.csv' alts = dr.open_raw(data_set)[0] alts = alts[0:20] seed = 0 promethee = prom.PrometheeII(alts, seed=seed) # print("initial ranking :") # print(promethee.ranking) # print("initial scores :") # print(promethee.scores) print("Rank reversals:") rr = promethee.compute_rr_number(True) print("rank reverasal quantity: " + str(rr)) rr_instances = promethee.analyse_rr() print('rank reversal recap :') print(rr_instances)
def compare_refflows(): """Check if the ref-flow computed with ReferencedPII object is correct.""" data_set = 'HDI' random.seed() seed = random.randint(1, 1000) print(seed) alt_num = 20 ref_number = 4 strategy = prom.strategy2 input_file = 'data/' + str(data_set) + '/raw.csv' alternatives = dr.open_raw(input_file)[0] referenced = prom.ReferencedPII(alternatives, strategy=strategy, seed=seed) SRP = referenced.SRP ref_scores = referenced.scores for i, alt in enumerate(alternatives): SRP_alt = SRP[:] SRP_alt.append(alt) promethee = prom.PrometheeII(SRP_alt, seed=seed) scores = promethee.scores if abs(scores[-1] - ref_scores[i]) < 1e-5: print("ok") else: print("There is something wrong") print(scores)
def test_rr_counting_function(): """Test the function computing the amount of RR between two rankings. The rankings compared are : * [1, 2, 3, 4, 5, 6] * [6, 4, 3, 1, 5] there should therefore be 7 rank reversals: (6,1);(6,3);(6,4);(6,5); (4,3);(4,1); (3,1) """ # we don't care about the parameters, we just want to initialise the object data_set = 'data/HDI/raw.csv' alts = dr.open_raw(data_set)[0] coeffs = [0.61224, 1.2] weights = [0.5, 0.5] promethee = prom.PrometheeII(alts, weights=weights, coefficients=coeffs) # Here start the real interresting test ranking_init = [1, 2, 3, 4, 5, 6] ranking_new = [6, 4, 3, 1, 5] alt_removed = 2 rr = promethee.compare_rankings(ranking_init, ranking_new, alt_removed) """Check that the arguments are not modified.""" print(ranking_init) print(ranking_new) print(rr)
def test_PMV(dataset="HDI"): """Test PMV with, this time, missing values.""" data_set = 'data/' + dataset + '/raw.csv' alts = dr.open_raw(data_set)[0][:10] proportion = 0.2 seed = 1 print("complete :") prom.printmatrix(alts) original_alts = copy.deepcopy(alts) mv.delete_evaluations(alts, proportion, seed) print("incomplete :") prom.printmatrix(alts) print("Promethee:") promethee = prom.PrometheeII(original_alts, seed=seed) rank = promethee.ranking scores = promethee.scores print("PrometheeMV without missing:") prometheeMV1 = prom.PrometheeMV(original_alts, seed=seed) rankMV1 = prometheeMV1.ranking scoresMV1 = prometheeMV1.scores print("PrometheeMV:") prometheeMV = prom.PrometheeMV(alts, seed=seed) rankMV = prometheeMV.ranking scoresMV = prometheeMV.scores for i in range(len(rank)): print( str(rank[i] + 1) + '::' + str(scores[rank[i]]) + " :::: " + str(rankMV1[i] + 1) + '::' + str(scoresMV1[rank[i]]) + " :::: " + str(rankMV[i] + 1) + '::' + str(scoresMV[rank[i]]))
def test_ranking_SHA(dataset='SHA'): """Test that PII computes the same ranking that in the article RobustPII.""" data_set = 'data/' + dataset + '/raw.csv' A, weights = dr.open_raw(data_set)[0], dr.open_raw(data_set)[1] A = normalize(A, axis=0, copy=True, norm='max') print(A) A = [list(alt) for alt in A] # print(alts) weights = [0.1, 0.2, 0.2, 0.2, 0.2, 0.1] percentiles = (25, 75) promethee = prom.PrometheeII(A, weights=weights, percentiles=percentiles) scores = promethee.scores rank = promethee.ranking for i in range(len(rank)): print(str(rank[i]) + '::' + str(scores[rank[i]])) print(promethee.pi[5][6] / 2) print(promethee.alternatives[54][3])
def compare(tests_qty=3): """Compare the different stratiegies.""" output = "res/ReferencedPII/strategies/comparisons.txt" data_sets = ['EPI', 'SHA', 'GEQ'] # data_sets = ['HDI'] range_seed = range(0, 0 + tests_qty) alt_num = 30 ref_number = 4 strategies = [ prom.strategy1, prom.strategy2, prom.strategy3, prom.strategy4 ] # strategies = [prom.strategy2] kendall_taus = [[] for i in range(4)] # One list for each strategy titles = [] for data_set in data_sets: input_file = 'data/' + str(data_set) + '/raw.csv' alternatives = dr.open_raw(input_file)[0] for seed in range_seed: promethee = prom.PrometheeII(alternatives, seed=seed, alt_num=alt_num) prom_ranking = promethee.ranking title = data_set + str(seed) titles.append(title) for i, strategy in enumerate(strategies): referenced = prom.ReferencedPII(alternatives, seed=seed, strategy=strategy, alt_num=alt_num) refrank = referenced.ranking tau = stats.kendalltau(refrank, prom_ranking)[0] tau = int(tau * 1000) / 1000 kendall_taus[i].append(tau) print_to_file(output, titles, kendall_taus, tests_qty)
def compare_rankings_once(all_alts, alt_num, weights, del_number, methods): """Compare strategies once.""" seed = random.randint(0, 1000) # print('seed', seed) # seed = 289 # print(seed) alts = random.sample(all_alts, alt_num) alts_inc = mv.delete_l_evaluations(alts, del_number, seed) # print("gapped :") # helpers.printmatrix(alts_inc) PII = prom.PrometheeII(alts, weights=weights, seed=seed) ranking_PII = PII.ranking kendall_taus = {} for method in methods: alts_completed = methods[method](alts_inc) score = PII.compute_netflow(alts_completed) ranking = PII.compute_ranking(score) kendall_taus[method] = stats.kendalltau(ranking_PII, ranking)[0] return kendall_taus
def genetic_search(alternatives, seed=None, weights=None, ceils=None, coefficients=None, alt_num=-1, SRP_size=4, pop_size=600, mut_prob=0.01, MAXIT=50): """Search for references sets reproducing PII with a genetic algorithm. Inputs: alternatives - matrix composed of one list of evaluations for each alternative. seed - seed provided to python pseudo random number generator. It is used to create some random (w, F) for the method if these are not provided as arguments. See promethee.py to see how this is done weights - list of the relative importance (or weigths) of all criteria. ceils - list of the values of the strict preference thresholds for all criteria (p). coefficients - if 'ceils' is not provided, some new ceils will be computed as these coefficents time the amplitude between the highest and lowest evaluation of each criterion. alt_num - quantity of alternatives from 'alternative' which must be kept. SRP_size - quantity of reference profiles searched. pop_size - size of the population. mut_prob - probability of mutation of each of the evaluation of each individual. MAXIT - maximal number of iterations of the procedure. """ # Initialisation of the PrometheeII, ReferencedPII objects promethee = prom.PrometheeII(alternatives, seed=seed, alt_num=alt_num, ceils=ceils, weights=weights, coefficients=coefficients) prom_ranking = promethee.ranking random.seed() population = initial_population(alternatives, pop_size, SRP_size) referenced = prom.ReferencedPII(alternatives, seed=seed, alt_num=alt_num, ceils=ceils, weights=weights, ref_set=population[0], coefficients=coefficients) evaluations = compute_evaluations(population, prom_ranking, referenced) best_score = max(evaluations) best_SRP_ever = population[evaluations.index(best_score)] it = 0 while(abs(best_score - 1) > 1e-5 and it < MAXIT): # print("it:" + str(it) + ' best score:' + str(best_score)) parents = chose_parents(population, evaluations, pop_size) population = combine_parents(parents) population = mutate_population(population, mut_prob) evaluations = compute_evaluations(population, prom_ranking, referenced) if max(evaluations) > best_score: best_score = max(evaluations) best_SRP_ever = population[evaluations.index(best_score)] it += 1 return best_score
def __init__(self, init_alternatives, seed=0, alt_num=30, ref_number=4, pts_per_random_it=200, random_add_it=500, divide_it=5, desired_points=3000): """Constructor. Inputs: init_alternatives - matrix composed of one list of evaluations for each alternative. seed - used to generate some pseudo random parameters. max_alt - maximal number of alternatives on which the procedure must be applied. ref_number - number of reference profiles in each set. pts_per_random_it - minimal quantity of points which are tried to be added at random 'simultaneously'. This quantity is repeated 'random_add_it' times at each iteration of the procedure. random_add_it - quantity of times at each iteration of the procedure 'pts_per_random_it' are considered to be added to the set of all admissible points. divide_it - number of times we try to add a new point near of an admissible one (for each of the admissible ones). desired_points - desired size of the set of admissible points after each iteration. These four last arguments are used because it is computationally not possible to start with a big enough set of admissible points. Therefore, at each iteration some points. More information in the 'round_add_points' function. """ self.ref_number = ref_number self.pts_per_random_it = pts_per_random_it self.desired_points = desired_points self.seed = seed self.random_add_it = random_add_it self.divide_it = divide_it self.promethee = PII.PrometheeII(init_alternatives, seed=self.seed, alt_num=alt_num) self.PII_ranking = self.promethee.ranking self.alternatives = self.promethee.alternatives # Used to add new points self.min_per_crit = [ min(crit) for crit in self.promethee.eval_per_crit ] self.max_per_crit = [ max(crit) for crit in self.promethee.eval_per_crit ] self.delta_per_crit = [ self.max_per_crit[crit] - self.min_per_crit[crit] for crit in range(len(self.max_per_crit)) ] self.crit_number = len(self.promethee.alternatives[0]) # SRP only used to initialise the referenced promethee object SRP = [[1 for i in range(self.crit_number)] for r in range(ref_number)] self.referenced = PII.ReferencedPII(init_alternatives, seed=self.seed, alt_num=alt_num, ref_set=SRP) if (not PII.check_parameters(self.promethee, self.referenced)): print('parameters not equal between method') exit() # This list contains all points which are still admissible at any given # iteration but which do not exactly reproduce the PII ranking. Points # reproducing the PII ranking are kept in another list for performances # purposes. self.admissible_points = [] self.correct_points = [] self.constraints = [] # Matrix that keep trace of all the rankings (one list per iteration) self.kendall_taus = [] self.add_initial_points() # define the template for printing the iteration analysis self.it_template = "{:^3d}|{: ^9d}|{: ^10d}|" \ + "{:^7d}|{: ^7.3f}|{: ^7.3f}|{: ^7.3f}|{: ^7.3f}|{: ^10s}|{: ^9d}" self.iteration = 0
def SRP_from_aqp(data_set="GEQ", seeds=range(3), alt_num=20): """Analyse the correct SRP found with this procedure.""" alts_file_name = "data/" + data_set + "/raw.csv" all_alts = dr.open_raw(alts_file_name)[0] mean_mean_ratio_str = [] var_var_ratio_str = [] mean_var_ratio_str = [] var_mean_ratio_str = [] template_ratio = '{0:^d}|' for i in range(len(all_alts[0])): template_ratio += '{' + str(i + 1) + ':+.3F}|' # Output output_file = "res/ReferencedPII/SRP_analysis/" + data_set for seed in seeds: # Input SRP_prefix = "res/ReferencedPII/adaptive_questioning_procedure/" all_SRP_file_name = data_set + "/" + str(seed) + ".csv" all_SRP = dr.open_raw_RS(SRP_prefix + all_SRP_file_name) # get the correct alt_num for the concerned seed promethee = prom.PrometheeII(all_alts, seed=seed, alt_num=alt_num) alts_per_criterion = list(map(list, zip(*promethee.alternatives))) # Check if the parameteres (= alternative subset) are indeed the same questioning_procedure = aqp.Adaptive_procedure(all_alts, seed=seed, alt_num=alt_num, ref_number=4, pts_per_random_it=200, desired_points=3000) if (not prom.check_parameters(questioning_procedure.promethee, promethee)): print("error") """Will contain lists of means of the ref's evaluation for each criterion ex: all_means_ratio[0] = [mean(c1(r1), ..., mean(c2(r1), ..., c2(r4))] SRP_means[2] = [...] """ # List of all ratios for individual SRP all_mean_ratios = [] all_var_ratios = [] for i in range(len(all_SRP)): SRP = all_SRP[i] # matrix = list of criteria which are lists of refs or # alternatives evaluations refs_per_criterion = list(map(list, zip(*SRP))) # ratio between estimator of on SRP compared to the one of the alts individual_mean_ratios, individual_var_ratios = [], [] for crit in range(len(refs_per_criterion)): var_ref = numpy.var(refs_per_criterion[crit]) mean_ref = numpy.mean(refs_per_criterion[crit]) var_alt = numpy.var(alts_per_criterion[crit]) mean_alt = numpy.mean(alts_per_criterion[crit]) individual_mean_ratios.append(mean_ref / mean_alt) individual_var_ratios.append(var_ref / var_alt) all_mean_ratios.append(individual_mean_ratios) all_var_ratios.append(individual_var_ratios) # transpose the matrix : a list of references sets which are lists # of the estimators for each criterion becomes a list of estimators for # each criterion which contains the estimater for each SRP var_ratios_per_crit = list(map(list, zip(*all_var_ratios))) mean_ratios_per_crit = list(map(list, zip(*all_mean_ratios))) var_var_ratios = [numpy.var(crit) for crit in var_ratios_per_crit] mean_var_ratios = [numpy.mean(crit) for crit in var_ratios_per_crit] var_mean_ratios = [numpy.var(crit) for crit in mean_ratios_per_crit] mean_mean_ratios = [numpy.mean(crit) for crit in mean_ratios_per_crit] # Transorm in strings var_var_ratio_str.append(template_ratio.format(seed, *var_var_ratios)) var_mean_ratio_str.append(template_ratio.format( seed, *var_mean_ratios)) mean_var_ratio_str.append(template_ratio.format( seed, *mean_var_ratios)) mean_mean_ratio_str.append( template_ratio.format(seed, *mean_mean_ratios)) with open(output_file, 'a') as output: output.write("var(var(ref)/var(alt)) \n") for i in var_var_ratio_str: output.write(i) output.write("\n") output.write("\n") output.write("var(mean(ref)/mean(alt)) \n") for i in var_mean_ratio_str: output.write(i) output.write("\n") output.write("\n") output.write("mean(var(ref)/var(alt)) \n") for i in mean_var_ratio_str: output.write(i) output.write("\n") output.write("\n") output.write("mean(mean(ref)/mean(alt)) \n") for i in mean_mean_ratio_str: output.write(i) output.write("\n") output.write("\n")
def count_rr(data='HDI', max_rep=10, R_parameter=None, m_parameter=None): """Test the number of rank reversals.""" # Parameter initialization, the interesting stuff is way lower R_list = R_parameter m_list = m_parameter if (data == 'HDI'): # Change these parameters if needed if (R_list is None): R_list = [500, 1000, 5000, 10000] if (m_list is None): m_list = [3, 5, 6, 7, 8, 10, 15] # Do not change these parameters ! They are not saved data_set = 'data/HDI/raw.csv' alts = dr.open_raw(data_set)[0] weights = [0.5, 0.5] ceils = [3, 3] seed = 0 # Not used, here to match the general signature elif (data == 'SHA'): # Change these parameters if needed if (R_list is None): R_list = [1000, 4000, 7000, 12000] m_list = [4, 6, 8, 9, 12, 15, 18] # Do not change these parameters ! They are not saved data_set = 'data/SHA/raw_20.csv' alts = dr.open_raw(data_set)[0] weights = [0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667] ceils = [17.100, 23.7750, 26.100, 27.3750, 17.9250, 13.5750] seed = 0 # Not used, here to match the general signature else: data = 'EPI' # Change these parameters if needed if (R_list is None): R_list = [500, 1000, 5000, 8000] if (m_list is None): m_list = [3, 4, 7, 9, 12, 14, 16, 18] # Do not change these parameters ! They are not saved data_set = 'data/EPI/raw.csv' alts = dr.open_raw(data_set)[0] alts = alts[0:20] weights, ceils = None, None seed = 0 output_dir = 'res/RobustPII/R_m_influence/' output = output_dir + data + '.txt' promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils, seed=seed) rr_promethee = promethee.compute_rr_number() rr_matrix = [] for R in R_list: rr_row = [] for m in m_list: rr = 0 for repetition in range(max_rep): random.seed() robust = prom.RobustPII(alts, weights=weights, ceils=ceils, seed=seed, R=R, m=m) rr += robust.compute_rr_number() rr = rr / max_rep rr_row.append(rr) print(rr_row) rr_matrix.append(rr_row) print_rr_to_file(output, rr_matrix, R_list, m_list, rr_promethee, max_rep)
def analyse_rr(data='SHA', max_rep=20, R_parameter=None, m_parameter=None): """Analyse the rank reversals occuring in RobustPII.""" if (data == 'HDI'): print('try with another dataset') exit() elif (data == 'SHA'): R = 5000 m = 9 # Do not change these parameters ! They are not saved data_set = 'data/SHA/raw_20.csv' alts = dr.open_raw(data_set)[0] weights = [0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667] ceils = [17.100, 23.7750, 26.100, 27.3750, 17.9250, 13.5750] seed = 1 else: data = 'EPI' R = 5000 m = 16 # Do not change these parameters ! They are not saved data_set = 'data/EPI/raw.csv' alts = dr.open_raw(data_set)[0] alts = alts[0:20] weights, ceils = None, None seed = 0 if R_parameter is not None: R = R_parameter if m_parameter is not None: m = m_parameter output = 'res/RobustPII/analyse_rank_reversals/' + str(data) + '.txt' promethee = prom.PrometheeII(alts, weights=weights, ceils=ceils, seed=seed) promethee_rr_instances = promethee.analyse_rr() all_rr_instances = dict() for repetition in range(max_rep): robust = prom.RobustPII(alts, weights=weights, ceils=ceils, seed=seed, R=R, m=m) rr_instances = robust.analyse_rr() for key in rr_instances: all_rr_instances[key] = \ all_rr_instances.get(key, 0) + rr_instances.get(key) all_info = [] key_set = set(all_rr_instances.keys()) | set(promethee_rr_instances.keys()) for key in key_set: line = [ key[0], key[1], all_rr_instances.get(key, 0) / max_rep, promethee_rr_instances.get(key, 0), abs(promethee.scores[key[0]] - promethee.scores[key[1]]), abs(robust.scores[key[0]] - robust.scores[key[1]]) ] all_info.append(line) print_to_file(output, all_info, promethee.scores, robust.scores, max_rep, R, m)