def run_experiment_bio_orphanet(dataset_path: str, raw_data=False, figures=False): # sets the values of b5-b4 to consider (note that b4 is set to 0) values_b5 = [0.0, 0.25, 0.5, 0.75, 1, 2] kcfs = [] # creation of the scoring schemes (the KCFs) for value_b5 in values_b5: kcfs.append( ScoringScheme([[0., 1., 1., 0., value_b5, 0.], [1., 1., 0., value_b5, value_b5, 0]])) exp1 = ExperimentOrphanet( dataset_folder=dataset_path, # the kcfs to consider scoring_schemes=kcfs, # the top-k to consider top_k_to_test=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110], # algorithm to compute the consensus algo=get_algorithm(alg=Algorithm.ParCons, parameters={ "bound_for_exact": 150, "auxiliary_algorithm": get_algorithm(alg=Algorithm.BioConsert) }), # selects all the tuples of rankings with at least 100 elements and 3 rankings # dataset_selector=DatasetSelector(nb_elem_min=100, nb_rankings_min=3) dataset_selector=DatasetSelector(nb_elem_min=100, nb_rankings_min=3), ) # run experiment and print results. If raw_data is true: also print all parameters of experiment (readme) # and the raw data that was used to compute the final data. If parameter is false, only final data is displayed exp1.run(raw_data, figures=figures)
def __init__(self, nb_years: int, nb_students_track1: int, nb_students_track2: int, nb_classes_total: int, nb_classes_track1: int, nb_classes_track2: int, mean_track1: float, variance_track1: float, mean_track2: float, variance_track2: float, topk: int, scoring_schemes: List[ScoringScheme], algo: MedianRanking = get_algorithm(Algorithm.ParCons, parameters={ "bound_for_exact": 150, "auxiliary_algorithm": get_algorithm(alg=Algorithm.BioConsert)}), ): super().__init__() self.__alg = algo self.__scoring_schemes = scoring_schemes self.__nb_years = nb_years self.__nb_students_track_1 = nb_students_track1 self.__nb_students_track_2 = nb_students_track2 self.__nb_classes_total = nb_classes_total self.__nb_classes_track_1 = nb_classes_track1 self.__nb_classes_track_2 = nb_classes_track2 self.__mean_track1 = mean_track1 self.__variance_track1 = variance_track1 self.__mean_track2 = mean_track2 self.__variance_track2 = variance_track2 self.__topk = topk
def __init__( self, dataset_folder: str, scoring_schemes_exp: List[ScoringScheme], changing_coeff: Tuple[int, int], intervals: List[Tuple[int, int]] = None, dataset_selector_exp: DatasetSelector = None, ): super().__init__(dataset_folder, dataset_selector_exp) self.__scoring_schemes = scoring_schemes_exp self.__alg = get_algorithm(alg=Algorithm.ParCons, parameters={ "bound_for_exact": 0, "auxiliary_algorithm": get_algorithm(alg=Algorithm.AllTied) }) self.__changing_coeff = changing_coeff if intervals is not None: self.__intervals = intervals else: max_n = self.datasets[0].nb_elements min_n = not self.datasets[0].nb_elements for dataset in self.datasets: if dataset.nb_elements > max_n: max_n = dataset.nb_elements if dataset.nb_elements < min_n: min_n = dataset.nb_elements self.__intervals = [(min_n, max_n)]
def run_bench_time_alg_exacts_vldb(path_dataset: str, raw_data=False, figures=False): # get the scoring schemes (the KCFs) kcf1 = ScoringScheme.get_unifying_scoring_scheme_p(1.) kcf2 = ScoringScheme.get_extended_measure_scoring_scheme() kcf3 = ScoringScheme.get_induced_measure_scoring_scheme_p(1.) kcf4 = ScoringScheme.get_pseudodistance_scoring_scheme_p(1.) kcfs = [kcf1, kcf2, kcf3, kcf4] # optimize = optim1, preprocess = optim2 ea = get_algorithm(alg=Algorithm.Exact, parameters={ "optimize": False, "preprocess": False }) ea_optim1 = get_algorithm(alg=Algorithm.Exact, parameters={ "optimize": True, "preprocess": False }) ea_optim1_optim2 = get_algorithm(alg=Algorithm.Exact, parameters={ "optimize": True, "preprocess": True }) algorithms_for_bench = [ea, ea_optim1, ea_optim1_optim2] # run experiment for each scoring scheme (KCF) for kcf in kcfs: bench = BenchTime( dataset_folder=path_dataset, # algorithms for the bench time algs=algorithms_for_bench, # the scoring scheme that is the kcf to consider scoring_scheme=kcf, # to select tuples of rankings with number of elements between 30 and 119 and at least 3 rankings dataset_selector_exp=DatasetSelector(nb_elem_min=30, nb_elem_max=119, nb_rankings_min=3), # range of size of datasets for the output steps=10, # re-compute the consensus until final time computation > 1 sec. # the average time computation is then returned repeat_time_computation_until=1.) # run experiment and print results. If parameter is true: also print all parameters of experiment (readme) # and the raw data that was used to compute the final data. If parameter is false, only final data is displayed bench.run(raw_data, figures=figures)
def _run_raw_data(self) -> str: res = "" for dataset in self.datasets: frontiers = ParFront().compute_frontiers(dataset, self.__scoring_scheme) alg = get_algorithm(Algorithm.CopelandMethod) consensus = alg.compute_consensus_rankings(dataset, self.__scoring_scheme, True) print(frontiers.consistent_with(consensus)) return res
def run_experiment_students_vldb(raw_data=False, figures=False): # seed 1 is set for python and numpy random.seed(1) np.random.seed(1) # sets the values of b5-b4 to consider (note that b4 is set to 0) #values_b5 = [0., 0.25, 0.5, 0.75, 1., 2] values_b5 = [0.] kcfs = [] # creation of the scoring schemes (the KCFs) for value_b5 in values_b5: kcfs.append( ScoringScheme([[0., 1., 1., 0., value_b5, 0.], [1., 1., 0., value_b5, value_b5, 0]])) """" the parameters are all the ones detailled in the research paper. 100 student classes, each student class has 280 students from track 1 and 20 from track 2. In tract 1: choose uniformly 14 classes over 17 and in track 2: choose uniformly 9 classes over the same 17. The marks obtained by students of track 1: N(10, 5*5) and by students of track 2 : N(16, 4*4). Evaluation is made using top-20 of the consensuses """ exp = MarksExperiment( # number of tuples of rankings to create nb_years=100, # number of students in track1 nb_students_track1=280, # number of students in track2 nb_students_track2=20, # number of classes the students can choose nb_classes_total=17, # number of classes the students of track1 choose (uniformly) nb_classes_track1=14, # number of classes the students of track2 choose (uniformly) nb_classes_track2=9, # mean marks for students in track1 for each class (normal distribution) mean_track1=10, # square of standard deviation of students marks in track1 for each class variance_track1=5, # mean marks for students in track2 for each class (normal distribution) mean_track2=16, # square of standard deviation of students marks in track2 for each class variance_track2=4, # top-k to consider for the experiment (comparison consensus and overall average) topk=20, # kcfs to consider scoring_schemes=kcfs, # algorithm to compute consensus algo=get_algorithm(Algorithm.CopelandMethod)) # run experiment and print results. If raw_data is true: also print all parameters of experiment (readme) # and the raw data that was used to compute the final data. If parameter is false, only final data is displayed exp.run(raw_data, figures)
def __init__(self, dataset_folder: str, scoring_schemes: List[ScoringScheme], top_k_to_test: List[int], algo: MedianRanking = get_algorithm(Algorithm.ParCons, parameters={"bound_for_exact": 150}), dataset_selector: DatasetSelector = None, ): super().__init__(dataset_folder=dataset_folder, dataset_selector=dataset_selector) self.__orphanetParser = OrphanetParser.get_orpha_base_for_vldb(join_paths(get_parent_path( get_parent_path(dataset_folder)), "supplementary_data")) self.__algo = algo self.__remove_useless_datasets() self.__scoring_schemes = [] self.__consensus = {} self.__scoring_schemes = scoring_schemes self.__top_k_to_test = top_k_to_test
def run_bench_exact_optimized_scoring_scheme_vldb(path_dataset: str, raw_data=False, figures=False): # get the scoring schemes (the KCFs) kcf1 = ScoringScheme.get_unifying_scoring_scheme_p(1.) kcf2 = ScoringScheme.get_extended_measure_scoring_scheme() kcf3 = ScoringScheme.get_induced_measure_scoring_scheme_p(1.) kcf4 = ScoringScheme.get_pseudodistance_scoring_scheme_p(1.) kcfs = [kcf1, kcf2, kcf3, kcf4] # optimize = optim1, preprocess = optim2 ea_optim1_optim2 = get_algorithm(alg=Algorithm.Exact, parameters={ "optimize": True, "preprocess": True }) # run experiment for each scoring scheme (KCF) bench = BenchScalabilityScoringScheme( dataset_folder=path_dataset, # the algorithm to consider alg=ea_optim1_optim2, # the kcfs to consider scoring_schemes=kcfs, # the dataset selector for selection according to the size dataset_selector_exp=DatasetSelector(nb_elem_min=130, nb_elem_max=300, nb_rankings_min=3), # range of size of datasets for the output steps=10, # max time computation allowed. for each kcf, the computation stops # when for a tuple of rankings the time computation is higher max_time=600, # re-compute the consensus until final time computation > 1 sec. The average time computation is then returned repeat_time_computation_until=0) # run experiment and print results. If parameter is true: also print all parameters of experiment (readme) # and the raw data that was used to compute the final data. If parameter is false, only final data is displayed bench.run(raw_data, figures)
dataset = Dataset([[[1], [2, 3]], [[3, 1], [4]], [[1], [5], [3, 2]]]) # or d = Dataset.get_rankings_from_file(file_path), with file_path is the path to fhe file # import a list of datasets in a same folder : Dataset.get_rankings_from_folder(path_folder) # print information about the dataset print(dataset.description()) # choose your scoring scheme (or sc = ScoringScheme() for default scoring scheme) sc = ScoringScheme([[0., 1., 1., 0., 1., 1.], [1., 1., 0., 1., 1., 0.]]) print("scoring scheme : " + str(sc)) # scoring scheme description print(sc.description()) print("\n### Consensus computation ###\n") algorithm = get_algorithm(alg=Algorithm.ParCons, parameters={"bound_for_exact": 90}) # compute consensus ranking consensus = algorithm.compute_consensus_rankings( dataset=dataset, scoring_scheme=sc, return_at_most_one_ranking=False) print(consensus.description()) # if you want the consensus ranking only : print(consensus) # to get the consensus rankings : consensus.consensus_rankings # list of rank aggregation algorithms to use among BioConsert, ParCons, ExactAlgorithm, KwikSortRandom, RepeatChoice, # PickAPerm, MedRank, BordaCount, BioCo, CopelandMethod algorithms_to_execute = [ get_algorithm(alg=Algorithm.KwikSortRandom), get_algorithm(alg=Algorithm.BioConsert,
for element in dataset.elements: h_gene_list_scores[element] = [] shuffle(dataset.rankings) for i in to_test: dataset_new = Dataset(dataset.rankings[0:i]) dataset_new.name = dataset.name consensus = self._algo.compute_consensus_rankings(dataset_new, self._scoring_cheme, True) copeland_scores = consensus.copeland_scores for element in dataset_new.elements: cop_score_element = copeland_scores.get(element) h_gene_list_scores[element].append(cop_score_element) for element in dataset.elements: res += dataset.name + ";" + str(element) + ";" + str(h_gene_list_scores[element]) + "\n" return res algor = get_algorithm(Algorithm.CopelandMethod) scoring_scheme_exp = ScoringScheme.get_pseudodistance_scoring_scheme_p(1.) """ rates_presence_min = [0.2] ic_rates = [0.05] for rate_presence_minimal in rates_presence_min: for ic_rate in ic_rates: print(ic_rate) print(rate_presence_minimal) b = BootstrapExperimentBiologicalIC(dataset_folder="/home/pierre/Bureau/vldb_data/datasets/biological_dataset", algo=algor, scoring_scheme=scoring_scheme_exp, nb_bootstrap=10000, dataset_selector=DatasetSelector(