path = 'J://utils'
sys.path.append(path)

from sklearn import cluster
import common_utils as utils
import clustering_utils as cl_utils
import classification_utils as cutils

X, _ = cl_utils.generate_synthetic_data_2d_clusters(n_samples=300,
                                                    n_centers=4,
                                                    cluster_std=0.60)
utils.plot_data_2d(X)

X, _ = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=300)
utils.plot_data_2d(X)

X, _ = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=300)
utils.plot_data_2d(X)

scoring = 's_score'
agg_estimator = cluster.AgglomerativeClustering()
agg_grid = {
    'linkage': ['ward', 'complete', 'average'],
    'n_clusters': list(range(2, 7))
}
agg_final_model = cl_utils.grid_search_best_model_clustering(agg_estimator,
                                                             agg_grid,
                                                             X,
                                                             scoring=scoring)
cl_utils.plot_model_2d_clustering(agg_final_model, X)
import sys
path = 'J://utils'
sys.path.append(path)

from sklearn import cluster, manifold
import common_utils as utils
import clustering_utils as cl_utils
import classification_utils as cutils

X, _ = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=300)
utils.plot_data_2d(X)

X, _ = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=300)
utils.plot_data_2d(X)

tsne = manifold.TSNE()
X_tsne = tsne.fit_transform(X)
utils.plot_data_2d(X_tsne)

scoring = 's_score'
kmeans_estimator = cluster.KMeans()
kmeans_grid = {'n_clusters': list(range(2, 7))}
kmeans_final_model = cl_utils.grid_search_best_model_clustering(
    kmeans_estimator, kmeans_grid, X, scoring=scoring)
print(kmeans_final_model.labels_)
print(kmeans_final_model.cluster_centers_)
cl_utils.plot_model_2d_clustering(kmeans_final_model, X)
import sys
path = 'E://utils'
sys.path.append(path)

from sklearn import cluster, mixture
import common_utils as utils
import clustering_utils as cl_utils

X, _ = cl_utils.generate_synthetic_data_2d_clusters(n_samples=300,
                                                    n_centers=4,
                                                    cluster_std=0.60)
utils.plot_data_2d(X)

scoring = 's_score'
gmm_estimator = mixture.GaussianMixture(n_components=3)
gmm_grid = {'n_components': list(range(10, 40))}
gmm_estimator.fit(X)
gmm_estimator.predict(X)
gmm_final_model = cl_utils.grid_search_best_model_clustering(gmm_estimator,
                                                             gmm_grid,
                                                             X,
                                                             scoring=scoring)
cl_utils.plot_model_2d_clustering(gmm_estimator, X)