def test_determine_k(self): """ Test the clusterer._dtermine_k function. """ test_dir_name = os.path.dirname(__file__) feat_array_fn = os.path.join(test_dir_name, "data", "four_clusters.csv") df = pd.read_csv(feat_array_fn) feat_array = df[["x", "y"]].values clusterer = Clusterer(feat_array_fn, "/dev/null", []) best_k = clusterer._determine_k(feat_array, 9) self.assertEqual(best_k, 4) feat_array_fn = os.path.join(test_dir_name, "data", "iris.csv") df = pd.read_csv(feat_array_fn) feat_array = df[[ "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width" ]].values clusterer = Clusterer(feat_array_fn, "/dev/null", []) best_k = clusterer._determine_k(feat_array, 9) self.assertEqual(best_k, 2)
def test_determine_k(self): """ Test the clusterer._dtermine_k function. """ test_dir_name = os.path.dirname(__file__) feat_array_fn = os.path.join( test_dir_name, "data", "four_clusters.csv") df = pd.read_csv(feat_array_fn) feat_array = df[["x", "y"]].values clusterer = Clusterer(feat_array_fn, "/dev/null", []) best_k = clusterer._determine_k(feat_array, 9) self.assertEqual(best_k, 4) feat_array_fn = os.path.join( test_dir_name, "data", "iris.csv") df = pd.read_csv(feat_array_fn) feat_array = df[[ "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"]].values clusterer = Clusterer(feat_array_fn, "/dev/null", []) best_k = clusterer._determine_k(feat_array, 9) self.assertEqual(best_k, 2)