Ejemplo n.º 1
0
    def test_determine_k(self):
        """
        Test the clusterer._dtermine_k function.
        """
        test_dir_name = os.path.dirname(__file__)
        feat_array_fn = os.path.join(test_dir_name, "data",
                                     "four_clusters.csv")
        df = pd.read_csv(feat_array_fn)
        feat_array = df[["x", "y"]].values

        clusterer = Clusterer(feat_array_fn, "/dev/null", [])
        best_k = clusterer._determine_k(feat_array, 9)

        self.assertEqual(best_k, 4)

        feat_array_fn = os.path.join(test_dir_name, "data", "iris.csv")
        df = pd.read_csv(feat_array_fn)
        feat_array = df[[
            "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"
        ]].values

        clusterer = Clusterer(feat_array_fn, "/dev/null", [])
        best_k = clusterer._determine_k(feat_array, 9)

        self.assertEqual(best_k, 2)
Ejemplo n.º 2
0
    def test_determine_k(self):
        """
        Test the clusterer._dtermine_k function.
        """
        test_dir_name = os.path.dirname(__file__)
        feat_array_fn = os.path.join(
            test_dir_name, "data", "four_clusters.csv")
        df = pd.read_csv(feat_array_fn)
        feat_array = df[["x", "y"]].values

        clusterer = Clusterer(feat_array_fn, "/dev/null", [])
        best_k = clusterer._determine_k(feat_array, 9)

        self.assertEqual(best_k, 4)

        feat_array_fn = os.path.join(
            test_dir_name, "data", "iris.csv")
        df = pd.read_csv(feat_array_fn)
        feat_array = df[[
            "Sepal.Length", "Sepal.Width", "Petal.Length",
            "Petal.Width"]].values

        clusterer = Clusterer(feat_array_fn, "/dev/null", [])
        best_k = clusterer._determine_k(feat_array, 9)

        self.assertEqual(best_k, 2)