def test_runtimes_ncobras(): clusterer = COBRAS(noise_probability=0.05, minimum_approximation_order=3, maximum_approximation_order=10, certainty_threshold=0.95) querier = ProbabilisticNoisyQuerier(None, None, 0.05, 100) tasks = make_n_run_10_fold_cross_validation("ncobras_0.05noise_runtimes", clusterer, querier, Dataset.get_standard_dataset_names(), 3, cobras_result_extractor) execute_list_of_clustering_tasks(tasks, tests_per_batch=200)
def NPU_MPCKMeans_VaryingAmountsOfNoise(): print("making tests") tests = TestCollection() query_budget = 200 for noise_percentage in [-1, 0.05, 0.10, 0.20]: noise_text = str(noise_percentage) if noise_percentage != -1 else "no" tests.add_10_times_10_fold_test( "NPU_MPCKmeans_{}_noise_budget{}".format(noise_text, query_budget), "NPU_MPCKmeans", mpck_means_algorithm_settings_to_string(), Dataset.get_standard_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string( noise_percentage, query_budget)) run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE) comparison_name = "NPU_MPCKmeans_varying_amounts_of_noise" test_names = [ "NPU_MPCKmeans_no_noise_budget200", "NPU_MPCKmeans_0.05_noise_budget200", "NPU_MPCKmeans_0.10_noise_budget200", "NPU_MPCKmeans_0.20_noise_budget200" ] line_names = [ "NPU_MPCKmeans_no_noise", "NPU_MPCKmeans_0.05_noise", "NPU_MPCKmeans_0.10_noise", "NPU_MPCKmeans_0.20_noise" ] calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names, query_budget=query_budget)
def test_varying_amounts_of_noise_cobras(): test_collection = [] # plain COBRAS clusterer = COBRAS(correct_noise=False) dataset_names = Dataset.get_standard_dataset_names() nb_of_runs = 3 test_collection.extend( test_varying_amounts_of_noise("new_cobras", clusterer, cobras_result_extractor, dataset_names=dataset_names, query_budget=200, nb_of_runs=nb_of_runs)) execute_list_of_clustering_tasks(test_collection, tests_per_batch=200)
def test_varying_amounts_of_noise_NPU_COSC(): from generate_clusterings.algorithms.my_cosc import MyCOSCMatlab from generate_clusterings.algorithms.my_npu import NPU test_collection = [] clusterer = NPU(MyCOSCMatlab(run_fast_version=True), debug=True) # dataset_names = ['hepatitis'] dataset_names = Dataset.get_standard_dataset_names() dataset_names.remove("hepatitis") nb_of_runs = 3 test_collection.extend( test_varying_amounts_of_noise("new_NPU_COSC_fast", clusterer, cosc_result_extractor, dataset_names=dataset_names, query_budget=200, nb_of_runs=nb_of_runs)) execute_list_of_clustering_tasks(test_collection, tests_per_batch=200)
def cobras_minimal_vs_all_cycles_test(): print("making tests") tests = TestCollection() tests.add_10_times_10_fold_test( "ncobras_minimal_cycles", "COBRAS", cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True, False), Dataset.get_standard_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 25)) tests.add_10_times_10_fold_test( "ncobras_all_cycles", "COBRAS", cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True, True), Dataset.get_standard_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 25)) run_tests_over_SSH_on_machines(tests, nb_of_computers=5) comparison_name = "ncobras_minimal_vs_all_cycles" test_names = ["ncobras_minimal_cycles", "ncobras_all_cycles"] line_names = None calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names)
def test_varying_amounts_of_noise(test_name_raw, clusterers, result_extractor, query_budget=200, nb_of_runs=3, noise_percentages=(-1, 0.05, 0.1), dataset_names=Dataset.get_standard_dataset_names()): if not isinstance(clusterers, list): clusterers = [clusterers for _ in range(3)] assert len(clusterers) == len(noise_percentages) test_collection = [] for clusterer, noise_percentage in zip(clusterers, noise_percentages): noise_text = str(noise_percentage) if noise_percentage != -1 else "no" test_name = f"{test_name_raw}_{noise_text}_budget_{query_budget}" querier = ProbabilisticNoisyQuerier(None, None, noise_percentage, query_budget) additional_tasks = make_n_run_10_fold_cross_validation(test_name, clusterer, querier, dataset_names, nb_of_runs, result_extractor) test_collection.extend(additional_tasks) return test_collection
def test_varying_amounts_of_noise_ncobras(): test_collection = [] dataset_names = Dataset.get_standard_dataset_names() nb_of_runs = 3 # nCOBRAS clusterers = [COBRAS(noise_probability=0.05, minimum_approximation_order=3, maximum_approximation_order=8, certainty_threshold=0.95), COBRAS(noise_probability=0.05, minimum_approximation_order=3, maximum_approximation_order=8, certainty_threshold=0.95), COBRAS(noise_probability=0.10, minimum_approximation_order=3, maximum_approximation_order=8, certainty_threshold=0.95)] test_collection.extend( test_varying_amounts_of_noise("new_ncobras", clusterers, cobras_result_extractor, dataset_names=dataset_names, query_budget=200, nb_of_runs=nb_of_runs)) execute_list_of_clustering_tasks(test_collection, tests_per_batch=200)