Esempio n. 1
0
def test_kmeans_when_k_is_2(dataset, expected1, expected2):
    expected_clustering1 = kmeans.get_list_from_dataset_file(expected1)
    expected_clustering2 = kmeans.get_list_from_dataset_file(expected2)
    clustering = kmeans.k_means(dataset_file=dataset, k=2)
    cost = kmeans.cost_function(clustering)

    for _ in range(10):
        new_clustering = kmeans.k_means(dataset_file=dataset, k=2)
        new_cost = kmeans.cost_function(clustering)
        if new_cost < cost:
            clustering = new_clustering
            cost = new_cost


    assert len(clustering.keys()) == 2
    assert clustered_all_points(clustering, kmeans.get_list_from_dataset_file(dataset)) is True
    clustered = []
    for assignment in clustering:
        clustered.append(clustering[assignment])
    assert clustered == [expected_clustering1, expected_clustering2]
Esempio n. 2
0
    def kmeans(self, args):
        """Run the kmeans command
        """
        import csv
        clustering = kmeans.k_means(dataset_file=args.dataset, k=int(args.k))
        cost = kmeans.cost_function(clustering)

        for _ in range(100):
            new_clustering = kmeans.k_means(dataset_file=args.dataset, k=int(args.k))
            new_cost = kmeans.cost_function(clustering)
            if new_cost < cost:
                clustering = new_clustering
                cost = new_cost

        for assignment in clustering.keys():
            file_name = str(args.dataset).split(".")[0]+"_k_is_"+args.k+"_"+str(assignment)+".csv"
            with open(file_name, "w") as f:
                writer = csv.writer(f)
                print("assignement ", assignment, " is: ", clustering[assignment])
                writer.writerows(clustering[assignment])
            f.close()
Esempio n. 3
0
def test_kmeans_when_k_is_3(dataset, expected1, expected2, expected3):
    expected_clustering1 = kmeans.get_list_from_dataset_file(expected1)
    expected_clustering2 = kmeans.get_list_from_dataset_file(expected2)
    expected_clustering3 = kmeans.get_list_from_dataset_file(expected3)
    clustering = kmeans.k_means(dataset_file=dataset, k=3)
    cost = kmeans.cost_function(clustering)

    for _ in range(3000):
        new_clustering = kmeans.k_means(dataset_file=dataset, k=3)
        new_cost = kmeans.cost_function(clustering)
        if new_cost < cost:
            clustering = new_clustering
            cost = new_cost

    assert len(clustering.keys()) == 3
    assert clustered_all_points(
        clustering, kmeans.get_list_from_dataset_file(dataset)) is True

    clustered = []
    for assignment in clustering:
        clustered.append(clustering[assignment])
    assert clustered == [
        expected_clustering1, expected_clustering2, expected_clustering3
    ]
    #return clustered == [expected_clustering1, expected_clustering2, expected_clustering3]


# a = "/Users/AaronLee/clustering/tests/test_files/dataset_1.csv"
# b = "/Users/AaronLee/clustering/tests/test_files/dataset_1_k_is_3_0.csv"
# c = "/Users/AaronLee/clustering/tests/test_files/dataset_1_k_is_3_1.csv"
# d = "/Users/AaronLee/clustering/tests/test_files/dataset_1_k_is_3_2.csv"
# x = test_kmeans_when_k_is_3(a,b,c,d)
#
# a1 = "/Users/AaronLee/clustering/tests/test_files/dataset_1.csv"
# b1 = "/Users/AaronLee/clustering/tests/test_files/dataset_1_k_is_2_0.csv"
# c1 = "/Users/AaronLee/clustering/tests/test_files/dataset_1_k_is_2_1.csv"
# x1 = test_kmeans_when_k_is_2(a1,b1,c1)
#
# a2 = "/Users/AaronLee/clustering/tests/test_files/dataset_1.csv"
# x2 = test_kmeans_when_k_is_1(a2)