def test_runtimes_ncobras():
    clusterer = COBRAS(noise_probability=0.05, minimum_approximation_order=3, maximum_approximation_order=10,
                       certainty_threshold=0.95)
    querier = ProbabilisticNoisyQuerier(None, None, 0.05, 100)
    tasks = make_n_run_10_fold_cross_validation("ncobras_0.05noise_runtimes", clusterer, querier,
                                                Dataset.get_standard_dataset_names(), 3, cobras_result_extractor)
    execute_list_of_clustering_tasks(tasks, tests_per_batch=200)
Beispiel #2
0
def NPU_MPCKMeans_VaryingAmountsOfNoise():
    print("making tests")
    tests = TestCollection()
    query_budget = 200
    for noise_percentage in [-1, 0.05, 0.10, 0.20]:
        noise_text = str(noise_percentage) if noise_percentage != -1 else "no"
        tests.add_10_times_10_fold_test(
            "NPU_MPCKmeans_{}_noise_budget{}".format(noise_text, query_budget),
            "NPU_MPCKmeans", mpck_means_algorithm_settings_to_string(),
            Dataset.get_standard_dataset_names(), "probability_noise_querier",
            probabilistic_noisy_querier_settings_to_string(
                noise_percentage, query_budget))
    run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
    comparison_name = "NPU_MPCKmeans_varying_amounts_of_noise"
    test_names = [
        "NPU_MPCKmeans_no_noise_budget200",
        "NPU_MPCKmeans_0.05_noise_budget200",
        "NPU_MPCKmeans_0.10_noise_budget200",
        "NPU_MPCKmeans_0.20_noise_budget200"
    ]
    line_names = [
        "NPU_MPCKmeans_no_noise", "NPU_MPCKmeans_0.05_noise",
        "NPU_MPCKmeans_0.10_noise", "NPU_MPCKmeans_0.20_noise"
    ]
    calculate_aris_and_compare_for_tests(comparison_name,
                                         test_names,
                                         line_names,
                                         query_budget=query_budget)
def test_varying_amounts_of_noise_cobras():
    test_collection = []
    # plain COBRAS
    clusterer = COBRAS(correct_noise=False)
    dataset_names = Dataset.get_standard_dataset_names()
    nb_of_runs = 3
    test_collection.extend(
        test_varying_amounts_of_noise("new_cobras", clusterer, cobras_result_extractor, dataset_names=dataset_names,
                                      query_budget=200, nb_of_runs=nb_of_runs))
    execute_list_of_clustering_tasks(test_collection, tests_per_batch=200)
def test_varying_amounts_of_noise_NPU_COSC():
    from generate_clusterings.algorithms.my_cosc import MyCOSCMatlab
    from generate_clusterings.algorithms.my_npu import NPU
    test_collection = []
    clusterer = NPU(MyCOSCMatlab(run_fast_version=True), debug=True)
    # dataset_names = ['hepatitis']
    dataset_names = Dataset.get_standard_dataset_names()
    dataset_names.remove("hepatitis")
    nb_of_runs = 3
    test_collection.extend(
        test_varying_amounts_of_noise("new_NPU_COSC_fast", clusterer, cosc_result_extractor, dataset_names=dataset_names,
                                      query_budget=200, nb_of_runs=nb_of_runs))
    execute_list_of_clustering_tasks(test_collection, tests_per_batch=200)
Beispiel #5
0
def cobras_minimal_vs_all_cycles_test():
    print("making tests")

    tests = TestCollection()
    tests.add_10_times_10_fold_test(
        "ncobras_minimal_cycles", "COBRAS",
        cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True,
                                            False),
        Dataset.get_standard_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 25))
    tests.add_10_times_10_fold_test(
        "ncobras_all_cycles", "COBRAS",
        cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, True,
                                            True),
        Dataset.get_standard_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 25))
    run_tests_over_SSH_on_machines(tests, nb_of_computers=5)
    comparison_name = "ncobras_minimal_vs_all_cycles"
    test_names = ["ncobras_minimal_cycles", "ncobras_all_cycles"]
    line_names = None
    calculate_aris_and_compare_for_tests(comparison_name, test_names,
                                         line_names)
def test_varying_amounts_of_noise(test_name_raw, clusterers, result_extractor, query_budget=200, nb_of_runs=3,
                                  noise_percentages=(-1, 0.05, 0.1),
                                  dataset_names=Dataset.get_standard_dataset_names()):
    if not isinstance(clusterers, list):
        clusterers = [clusterers for _ in range(3)]
    assert len(clusterers) == len(noise_percentages)
    test_collection = []
    for clusterer, noise_percentage in zip(clusterers, noise_percentages):
        noise_text = str(noise_percentage) if noise_percentage != -1 else "no"
        test_name = f"{test_name_raw}_{noise_text}_budget_{query_budget}"
        querier = ProbabilisticNoisyQuerier(None, None, noise_percentage, query_budget)
        additional_tasks = make_n_run_10_fold_cross_validation(test_name, clusterer, querier, dataset_names, nb_of_runs,
                                                               result_extractor)
        test_collection.extend(additional_tasks)
    return test_collection
def test_varying_amounts_of_noise_ncobras():
    test_collection = []
    dataset_names = Dataset.get_standard_dataset_names()
    nb_of_runs = 3

    # nCOBRAS
    clusterers = [COBRAS(noise_probability=0.05, minimum_approximation_order=3, maximum_approximation_order=8,
                         certainty_threshold=0.95),
                  COBRAS(noise_probability=0.05, minimum_approximation_order=3, maximum_approximation_order=8,
                         certainty_threshold=0.95),
                  COBRAS(noise_probability=0.10, minimum_approximation_order=3, maximum_approximation_order=8,
                         certainty_threshold=0.95)]
    test_collection.extend(
        test_varying_amounts_of_noise("new_ncobras", clusterers, cobras_result_extractor, dataset_names=dataset_names,
                                      query_budget=200, nb_of_runs=nb_of_runs))

    execute_list_of_clustering_tasks(test_collection, tests_per_batch=200)