コード例 #1
0
def simple_cobras_tests():
    # this class contains the logic to build different kind of test_cobras cases
    # if you want another testing scenario you can add it to this class
    tests = TestCollection()
    tests.add_10_times_10_fold_test(
        "<TEST NAME>",
        "COBRAS",
        cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, False,
                                            False),
        Dataset.get_dataset_names(),
        "probability_noise_querier",  # name of the querier
        probabilistic_noisy_querier_settings_to_string(
            0, 200))  # noise probability 0 --> no noise
    # this runs the tests locally over the number of cores specified
    run_tests_local(tests, nb_of_cores=4)

    # after running several of the above you can compare different results as follows
    comparison_name = "NAME OF THE COMPARISON"
    test_names = [
        "<TEST NAME>", "<OTHER TEST NAME>"
    ]  # these should be the same string as the first argument of tests.add_10_times_10_fold_test
    line_names = [
        "<simple name for <TEST NAME>>", "<OTHER SIMPLE NAME>"
    ]  # these names are displayed in the legend of the plots instead of test_cobras names (test_cobras names should be unique and can thus become very large)
    # this will calculate all the aris and compare the tests this is not possible over SSH but this is not as much works as well
    calculate_aris_and_compare_for_tests(comparison_name,
                                         test_names,
                                         line_names,
                                         query_budget=200,
                                         nb_of_cores=4)
コード例 #2
0
def cobras_minimal_vs_all_cycles_test():
    print("making tests")

    tests = TestCollection()
    tests.add_10_times_10_fold_test(
        "ncobras_minimal_cycles", "COBRAS",
        cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True,
                                            False),
        Dataset.get_standard_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 25))
    tests.add_10_times_10_fold_test(
        "ncobras_all_cycles", "COBRAS",
        cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True,
                                            True),
        Dataset.get_standard_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 25))
    run_tests_over_SSH_on_machines(tests, nb_of_computers=5)
    comparison_name = "ncobras_minimal_vs_all_cycles"
    test_names = ["ncobras_minimal_cycles", "ncobras_all_cycles"]
    line_names = None
    calculate_aris_and_compare_for_tests(comparison_name, test_names,
                                         line_names)
コード例 #3
0
def ncobras_noise_comparison_fixed_noise_changing_p_noise():
    print("making tests")
    tests = TestCollection()

    tests.add_10_times_10_fold_test(
        "ncobras_0.10_noise_0.05_p_noise", "COBRAS",
        cobras_algorithm_settings_to_string(0.05, 3, 7, 0.96, 0.96,
                                            True, False),
        Dataset.get_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 250))
    tests.add_10_times_10_fold_test(
        "ncobras_0.10_noise_0.10_p_noise", "COBRAS",
        cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91,
                                            True, False),
        Dataset.get_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 250))
    tests.add_10_times_10_fold_test(
        "ncobras_0.10_noise_0.15_p_noise", "COBRAS",
        cobras_algorithm_settings_to_string(0.15, 3, 7, 0.91, 0.91,
                                            True, False),
        Dataset.get_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 250))
    tests.add_10_times_10_fold_test(
        "ncobras_0.10_noise_0.20_p_noise", "COBRAS",
        cobras_algorithm_settings_to_string(0.20, 3, 7, 0.91, 0.91,
                                            True, False),
        Dataset.get_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 250))
    run_tests_over_SSH_on_machines(
        tests, generate_computer_info(start_index=21, nb_of_machines=5))
    comparison_name = "ncobras_parameter_sensitivity"
    test_names = [
        "ncobras_0.10_noise_0.05_p_noise", "ncobras_0.10_noise_0.10_p_noise",
        "ncobras_0.10_noise_0.15_p_noise", "ncobras_0.10_noise_0.20_p_noise"
    ]

    line_names = None
    calculate_aris_and_compare_for_tests(comparison_name, test_names,
                                         line_names)
コード例 #4
0
def ncobras_plus_varying_amounts_of_noise():
    print("making tests")
    tests = TestCollection()
    query_budget = 200
    for noise_percentage in [-1, 0.05, 0.10]:
        noise_text = str(noise_percentage) if noise_percentage != -1 else "no"
        threshold = 0.95
        noise_percentage_to_use = noise_percentage if noise_percentage > 0 else 0.10
        tests.add_10_times_10_fold_test(
            "NCOBRASplus_{}_noise_budget{}_pnoise{}_threshold{}".format(
                noise_text, query_budget, noise_percentage_to_use, threshold),
            "COBRAS",
            cobras_algorithm_settings_to_string(noise_percentage_to_use, 3, 10,
                                                threshold, threshold, True,
                                                False),
            Dataset.get_dataset_names(),
            "probability_noise_querier",
            probabilistic_noisy_querier_settings_to_string(
                noise_percentage, query_budget),
            nb_of_runs=10)
    run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
コード例 #5
0
def cobras_parameter_comparison():
    print("making tests")
    query_budget = 200

    tests = TestCollection()
    test_names = ["cobras_no_noise"]
    test_dict = {
        0.05: [0.96, 0.99],
        0.10: [0.96, 0.99],
        0.15: [0.91, 0.96, 0.99],
        0.20: [0.86, 0.91, 0.96]
    }
    for p in [0.10, 0.15, 0.20]:
        t_values = test_dict[p]
        for t in t_values:
            test_names.append("cobras_0.10_p{}_t{}_noise_budget{}".format(
                p, t, query_budget))
            tests.add_10_times_10_fold_test(
                "cobras_0.10_p{}_t{}_noise_budget{}".format(
                    p, t, query_budget),
                "COBRAS",
                cobras_algorithm_settings_to_string(p, 3, 7, t, t, True,
                                                    False),
                Dataset.get_non_face_news_spam_names(),
                "probability_noise_querier",
                probabilistic_noisy_querier_settings_to_string(
                    0.10, query_budget),
                nb_of_runs=1)

    run_tests_over_SSH_on_machines(
        tests, himecs_generate_computer_info(start_index=3, nb_of_machines=2))
    comparison_name = "all_parameter_study"
    line_names = [test_name[12:-16] for test_name in test_names]
    calculate_aris_and_compare_for_tests(comparison_name,
                                         test_names,
                                         line_names,
                                         nb_of_cores=24,
                                         query_budget=200,
                                         recalculate=False)
コード例 #6
0
def ncobras_plus_runtime_test():
    tests = TestCollection()
    query_budget = 100
    noise_precentage = 0.05
    threshold = 0.95
    tests.add_10_times_10_fold_test(
        "NCOBRASplus_{}_noise_budget{}_pnoise{}_threshold{}_runtimes".format(
            noise_precentage, query_budget, noise_precentage, threshold),
        "COBRAS",
        cobras_algorithm_settings_to_string(noise_precentage,
                                            min_approx_order=3,
                                            max_approx_order=10,
                                            keep_threshold=threshold,
                                            reuse_threshold=threshold,
                                            correct_noise=True,
                                            use_all_cycles=False),
        Dataset.get_dataset_names(),
        "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(noise_precentage,
                                                       query_budget),
        nb_of_runs=10)
    run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
コード例 #7
0
def cobras_VaryingAmountsOfNoise():
    print("making tests")
    tests = TestCollection()
    query_budget = 200
    for noise_percentage in [-1, 0.05, 0.10, 0.20]:
        noise_text = str(noise_percentage) if noise_percentage != -1 else "no"
        tests.add_10_times_10_fold_test(
            "cobras_{}_noise_budget{}".format(noise_text, query_budget),
            "COBRAS",
            cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, False,
                                                False),
            Dataset.get_dataset_names(), "probability_noise_querier",
            probabilistic_noisy_querier_settings_to_string(
                noise_percentage, query_budget))
    run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
    comparison_name = "cobras_varying_amounts_of_noise"
    test_names = [
        "cobras_{}_noise_budget200".format(i)
        for i in ["no", 0.05, 0.10, 0.20]
    ]
    line_names = None
    calculate_aris_and_compare_for_tests(comparison_name, test_names,
                                         line_names)
コード例 #8
0
def synthetic_datasets_comparison():
    print("making tests")
    tests = TestCollection()
    datasets = ["compound", "flame", "jain", "pathbased", "spiral"]
    algorithms = "COBRAS"
    tests.add_10_times_10_fold_test(
        "COSC_synthetic_no_noise",
        "NPU_COSC",
        "no parameters",
        datasets,
        "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(-1, 100),
        nb_of_runs=3)
    tests.add_10_times_10_fold_test(
        "COSC_synthetic_0.10_noise",
        "NPU_COSC",
        "no parameters",
        datasets,
        "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 100),
        nb_of_runs=3)
    tests.add_10_times_10_fold_test(
        "MPCK_means_synthetic_0.10_noise",
        "NPU_MPCKmeans",
        mpck_means_algorithm_settings_to_string(),
        datasets,
        "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 100),
        nb_of_runs=3)
    tests.add_10_times_10_fold_test(
        "MPCK_means_synthetic_no_noise",
        "NPU_MPCKmeans",
        mpck_means_algorithm_settings_to_string(),
        datasets,
        "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(-1, 100),
        nb_of_runs=3)
    tests.add_10_times_10_fold_test(
        "nCOBRAS_synthetic_no_noise",
        "COBRAS",
        cobras_algorithm_settings_to_string(0.10,
                                            min_approx_order=3,
                                            max_approx_order=5,
                                            keep_threshold=0.99,
                                            reuse_threshold=0.99,
                                            correct_noise=False),
        datasets,
        "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(-1, 100),
        nb_of_runs=3)
    tests.add_10_times_10_fold_test(
        "nCOBRAS_synthetic_0.10_noise",
        "COBRAS",
        cobras_algorithm_settings_to_string(0.10,
                                            min_approx_order=3,
                                            max_approx_order=5,
                                            keep_threshold=0.99,
                                            reuse_threshold=0.99),
        datasets,
        "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 100),
        nb_of_runs=3)
    run_tests_over_SSH_on_machines(tests, himecs_generate_computer_info(2, 2))
    comparison_name = "MPCK_vs_cobras_synthetic"
    test_names = [
        "MPCK_means_synthetic_0.10_noise", "MPCK_means_synthetic_no_noise",
        "nCOBRAS_synthetic_0.10_noise", "nCOBRAS_synthetic_no_noise"
    ]  # , "COSC_synthetic_0.10_noise", "COSC_synthetic_no_noise"]
    line_names = None
    calculate_aris_and_compare_for_tests(comparison_name,
                                         test_names,
                                         line_names,
                                         nb_of_cores=24,
                                         query_budget=100)