def simple_cobras_tests(): # this class contains the logic to build different kind of test_cobras cases # if you want another testing scenario you can add it to this class tests = TestCollection() tests.add_10_times_10_fold_test( "<TEST NAME>", "COBRAS", cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, False, False), Dataset.get_dataset_names(), "probability_noise_querier", # name of the querier probabilistic_noisy_querier_settings_to_string( 0, 200)) # noise probability 0 --> no noise # this runs the tests locally over the number of cores specified run_tests_local(tests, nb_of_cores=4) # after running several of the above you can compare different results as follows comparison_name = "NAME OF THE COMPARISON" test_names = [ "<TEST NAME>", "<OTHER TEST NAME>" ] # these should be the same string as the first argument of tests.add_10_times_10_fold_test line_names = [ "<simple name for <TEST NAME>>", "<OTHER SIMPLE NAME>" ] # these names are displayed in the legend of the plots instead of test_cobras names (test_cobras names should be unique and can thus become very large) # this will calculate all the aris and compare the tests this is not possible over SSH but this is not as much works as well calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names, query_budget=200, nb_of_cores=4)
def cobras_minimal_vs_all_cycles_test(): print("making tests") tests = TestCollection() tests.add_10_times_10_fold_test( "ncobras_minimal_cycles", "COBRAS", cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True, False), Dataset.get_standard_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 25)) tests.add_10_times_10_fold_test( "ncobras_all_cycles", "COBRAS", cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True, True), Dataset.get_standard_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 25)) run_tests_over_SSH_on_machines(tests, nb_of_computers=5) comparison_name = "ncobras_minimal_vs_all_cycles" test_names = ["ncobras_minimal_cycles", "ncobras_all_cycles"] line_names = None calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names)
def ncobras_noise_comparison_fixed_noise_changing_p_noise(): print("making tests") tests = TestCollection() tests.add_10_times_10_fold_test( "ncobras_0.10_noise_0.05_p_noise", "COBRAS", cobras_algorithm_settings_to_string(0.05, 3, 7, 0.96, 0.96, True, False), Dataset.get_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 250)) tests.add_10_times_10_fold_test( "ncobras_0.10_noise_0.10_p_noise", "COBRAS", cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True, False), Dataset.get_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 250)) tests.add_10_times_10_fold_test( "ncobras_0.10_noise_0.15_p_noise", "COBRAS", cobras_algorithm_settings_to_string(0.15, 3, 7, 0.91, 0.91, True, False), Dataset.get_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 250)) tests.add_10_times_10_fold_test( "ncobras_0.10_noise_0.20_p_noise", "COBRAS", cobras_algorithm_settings_to_string(0.20, 3, 7, 0.91, 0.91, True, False), Dataset.get_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 250)) run_tests_over_SSH_on_machines( tests, generate_computer_info(start_index=21, nb_of_machines=5)) comparison_name = "ncobras_parameter_sensitivity" test_names = [ "ncobras_0.10_noise_0.05_p_noise", "ncobras_0.10_noise_0.10_p_noise", "ncobras_0.10_noise_0.15_p_noise", "ncobras_0.10_noise_0.20_p_noise" ] line_names = None calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names)
def ncobras_plus_varying_amounts_of_noise(): print("making tests") tests = TestCollection() query_budget = 200 for noise_percentage in [-1, 0.05, 0.10]: noise_text = str(noise_percentage) if noise_percentage != -1 else "no" threshold = 0.95 noise_percentage_to_use = noise_percentage if noise_percentage > 0 else 0.10 tests.add_10_times_10_fold_test( "NCOBRASplus_{}_noise_budget{}_pnoise{}_threshold{}".format( noise_text, query_budget, noise_percentage_to_use, threshold), "COBRAS", cobras_algorithm_settings_to_string(noise_percentage_to_use, 3, 10, threshold, threshold, True, False), Dataset.get_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string( noise_percentage, query_budget), nb_of_runs=10) run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
def cobras_parameter_comparison(): print("making tests") query_budget = 200 tests = TestCollection() test_names = ["cobras_no_noise"] test_dict = { 0.05: [0.96, 0.99], 0.10: [0.96, 0.99], 0.15: [0.91, 0.96, 0.99], 0.20: [0.86, 0.91, 0.96] } for p in [0.10, 0.15, 0.20]: t_values = test_dict[p] for t in t_values: test_names.append("cobras_0.10_p{}_t{}_noise_budget{}".format( p, t, query_budget)) tests.add_10_times_10_fold_test( "cobras_0.10_p{}_t{}_noise_budget{}".format( p, t, query_budget), "COBRAS", cobras_algorithm_settings_to_string(p, 3, 7, t, t, True, False), Dataset.get_non_face_news_spam_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string( 0.10, query_budget), nb_of_runs=1) run_tests_over_SSH_on_machines( tests, himecs_generate_computer_info(start_index=3, nb_of_machines=2)) comparison_name = "all_parameter_study" line_names = [test_name[12:-16] for test_name in test_names] calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names, nb_of_cores=24, query_budget=200, recalculate=False)
def ncobras_plus_runtime_test(): tests = TestCollection() query_budget = 100 noise_precentage = 0.05 threshold = 0.95 tests.add_10_times_10_fold_test( "NCOBRASplus_{}_noise_budget{}_pnoise{}_threshold{}_runtimes".format( noise_precentage, query_budget, noise_precentage, threshold), "COBRAS", cobras_algorithm_settings_to_string(noise_precentage, min_approx_order=3, max_approx_order=10, keep_threshold=threshold, reuse_threshold=threshold, correct_noise=True, use_all_cycles=False), Dataset.get_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(noise_precentage, query_budget), nb_of_runs=10) run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
def cobras_VaryingAmountsOfNoise(): print("making tests") tests = TestCollection() query_budget = 200 for noise_percentage in [-1, 0.05, 0.10, 0.20]: noise_text = str(noise_percentage) if noise_percentage != -1 else "no" tests.add_10_times_10_fold_test( "cobras_{}_noise_budget{}".format(noise_text, query_budget), "COBRAS", cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, False, False), Dataset.get_dataset_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string( noise_percentage, query_budget)) run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE) comparison_name = "cobras_varying_amounts_of_noise" test_names = [ "cobras_{}_noise_budget200".format(i) for i in ["no", 0.05, 0.10, 0.20] ] line_names = None calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names)
def synthetic_datasets_comparison(): print("making tests") tests = TestCollection() datasets = ["compound", "flame", "jain", "pathbased", "spiral"] algorithms = "COBRAS" tests.add_10_times_10_fold_test( "COSC_synthetic_no_noise", "NPU_COSC", "no parameters", datasets, "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(-1, 100), nb_of_runs=3) tests.add_10_times_10_fold_test( "COSC_synthetic_0.10_noise", "NPU_COSC", "no parameters", datasets, "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 100), nb_of_runs=3) tests.add_10_times_10_fold_test( "MPCK_means_synthetic_0.10_noise", "NPU_MPCKmeans", mpck_means_algorithm_settings_to_string(), datasets, "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 100), nb_of_runs=3) tests.add_10_times_10_fold_test( "MPCK_means_synthetic_no_noise", "NPU_MPCKmeans", mpck_means_algorithm_settings_to_string(), datasets, "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(-1, 100), nb_of_runs=3) tests.add_10_times_10_fold_test( "nCOBRAS_synthetic_no_noise", "COBRAS", cobras_algorithm_settings_to_string(0.10, min_approx_order=3, max_approx_order=5, keep_threshold=0.99, reuse_threshold=0.99, correct_noise=False), datasets, "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(-1, 100), nb_of_runs=3) tests.add_10_times_10_fold_test( "nCOBRAS_synthetic_0.10_noise", "COBRAS", cobras_algorithm_settings_to_string(0.10, min_approx_order=3, max_approx_order=5, keep_threshold=0.99, reuse_threshold=0.99), datasets, "probability_noise_querier", probabilistic_noisy_querier_settings_to_string(0.10, 100), nb_of_runs=3) run_tests_over_SSH_on_machines(tests, himecs_generate_computer_info(2, 2)) comparison_name = "MPCK_vs_cobras_synthetic" test_names = [ "MPCK_means_synthetic_0.10_noise", "MPCK_means_synthetic_no_noise", "nCOBRAS_synthetic_0.10_noise", "nCOBRAS_synthetic_no_noise" ] # , "COSC_synthetic_0.10_noise", "COSC_synthetic_no_noise"] line_names = None calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names, nb_of_cores=24, query_budget=100)