def test_kmeans_silhouette_groupby1(self): predict_out = kmeans_silhouette_train_predict(table=self.iris, input_cols=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], n_clusters_list=[3, 2], prediction_col='predict', init='random', n_init=3, max_iter=300, tol=0.0001, precompute_distances=False, seed=12345, n_jobs=1, algorithm='full', n_samples=2, group_by=['species']) table = predict_out['out_table'].values.tolist() self.assertListEqual(table[0], [5.1, 3.5, 1.4, 0.2, 'setosa', 1]) self.assertListEqual(table[1], [4.9, 3.0, 1.4, 0.2, 'setosa', 0]) self.assertListEqual(table[2], [4.7, 3.2, 1.3, 0.2, 'setosa', 0]) self.assertListEqual(table[3], [4.6, 3.1, 1.5, 0.2, 'setosa', 0]) self.assertListEqual(table[4], [5.0, 3.6, 1.4, 0.2, 'setosa', 1])
def test_kmeans_silhouette_groupby1(self): df = load_iris() train_out = kmeans_silhouette_train_predict(df, input_cols=[ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width' ], group_by=['species']) predict_out = kmeans_predict(df, train_out['model'])
def test_kmeans_silhouette_groupby2(self): predict_out = kmeans_silhouette_train_predict(table=self.iris, input_cols=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], n_clusters_list=[3, 2], prediction_col='predict', init='random', n_init=3, max_iter=300, tol=0.0001, precompute_distances=False, seed=12345, n_jobs=1, algorithm='full', n_samples=2, group_by=['species']) table = predict_out['model']['_grouped_data']['data']['setosa']['best_centers'].tolist() self.assertAlmostEqual(table[0][0], 4.71304348, 8) self.assertAlmostEqual(table[0][1], 3.12173913, 8) self.assertAlmostEqual(table[0][2], 1.4173913, 7) self.assertAlmostEqual(table[0][3], 0.19130435, 8) self.assertAlmostEqual(table[1][0], 5.25555556, 8) self.assertAlmostEqual(table[1][1], 3.67037037, 8) self.assertAlmostEqual(table[1][2], 1.5037037, 7) self.assertAlmostEqual(table[1][3], 0.28888889, 8)
def test_kmeans_silhouette2(self): predict_out = kmeans_silhouette_train_predict(table=self.iris, input_cols=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], n_clusters_list=[3, 2], prediction_col='predict', init='k-means++', n_init=3, max_iter=300, tol=0.0001, precompute_distances=True, seed=12345, n_jobs=1, algorithm='auto', n_samples=2, group_by=None) table = predict_out['model']['best_centers'] self.assertAlmostEqual(table[0].tolist()[0], 5.00566038, 8) self.assertAlmostEqual(table[0].tolist()[1], 3.36037736, 8) self.assertAlmostEqual(table[0].tolist()[2], 1.56226415, 8) self.assertAlmostEqual(table[0].tolist()[3], 0.28867925, 8) self.assertAlmostEqual(table[1].tolist()[0], 6.30103093, 8) self.assertAlmostEqual(table[1].tolist()[1], 2.88659794, 8) self.assertAlmostEqual(table[1].tolist()[2], 4.95876289, 8) self.assertAlmostEqual(table[1].tolist()[3], 1.69587629, 8)
def kmeans_silhouette_groupby1(self): df = get_iris() train_out = kmeans_silhouette_train_predict(df, input_cols=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], group_by=['species']) predict_out = kmeans_predict(df, train_out['model']) print(predict_out['out_table'])