import sys path = 'J://utils' sys.path.append(path) from sklearn import cluster, manifold import common_utils as utils import clustering_utils as cl_utils import classification_utils as cutils X, _ = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=300) utils.plot_data_2d(X) X, _ = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=300) utils.plot_data_2d(X) tsne = manifold.TSNE() X_tsne = tsne.fit_transform(X) utils.plot_data_2d(X_tsne) scoring = 's_score' kmeans_estimator = cluster.KMeans() kmeans_grid = {'n_clusters': list(range(2, 7))} kmeans_final_model = cl_utils.grid_search_best_model_clustering( kmeans_estimator, kmeans_grid, X, scoring=scoring) print(kmeans_final_model.labels_) print(kmeans_final_model.cluster_centers_) cl_utils.plot_model_2d_clustering(kmeans_final_model, X)
# load the data digits = datasets.load_digits() print(digits.data.shape) plot_digits(digits.data, "Original Digits") np.corrcoef(digits.data) # project the 64-dimensional data to a lower dimension pca = decomposition.PCA(n_components=30, whiten=False) pca_digits = pca.fit_transform(digits.data) print(pca.explained_variance_) print(pca.explained_variance_ratio_) print(pca.components_) print(pca_digits.shape) #incorrect visualization with only 2 pcs utils.plot_data_2d(pca_digits[:, 0:2]) #tsne on pca data tsne = manifold.TSNE() tsne_digits = tsne.fit_transform(pca_digits) print(tsne.kl_divergence_) print(tsne_digits.shape) cutils.plot_data_2d_classification(tsne_digits, digits.target) #using GMM gmm_estimator = mixture.GaussianMixture() gmm_params = {'n_components': np.arange(50, 200, 10)} gmm_grid_estimator = model_selection.GridSearchCV(gmm_estimator, gmm_params) gmm_grid_estimator.fit(pca_digits) gmm_best_estimator = gmm_grid_estimator.best_estimator_
import sys path = 'E://utils' sys.path.append(path) from sklearn import cluster import common_utils as utils import clustering_utils as cl_utils import classification_utils as cutils X, _ = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=300) utils.plot_data_2d(X) X, _ = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=300) utils.plot_data_2d(X) scoring = 's_score' kmeans_estimator = cluster.KMeans() kmeans_grid = {'n_clusters': list(range(2, 7))} kmeans_final_model = cl_utils.grid_search_best_model_clustering( kmeans_estimator, kmeans_grid, X, scoring=scoring) print(kmeans_final_model.labels_) print(kmeans_final_model.cluster_centers_) cl_utils.plot_model_2d_clustering(kmeans_final_model, X)
grid_search_plot_models_regression, plot_coefficients_regression, \ plot_target_and_transformed_target_regression, rmse, regression_performance from feature_reduction_utils import feature_reduction_linear_pca, feature_reduction_kernel_pca, \ feature_reduction_tsne, feature_reduction_isomap from kernel_utils import GaussianFeatures, KernelTransformer from sklearn.model_selection import train_test_split from sklearn import metrics, decomposition, manifold from sklearn import tree, covariance, linear_model, ensemble, neighbors, svm, model_selection, feature_selection import pandas as pd import numpy as np import matplotlib.pyplot as plt X, y = generate_linear_synthetic_data_classification(n_samples=1000, n_features=3, n_redundant=0, n_classes=3, weights=[.3,.3,.4]) plot_data_3d(X) X_lpca = feature_reduction_linear_pca(X, 2) plot_data_2d(X_lpca, new_window=True) X_kpca = feature_reduction_kernel_pca(X, 2) plot_data_2d(X_kpca, new_window=True) X_tsne = feature_reduction_tsne(X, 2) plot_data_2d(X_tsne, new_window=True) X_isomap = feature_reduction_isomap(X, 2) plot_data_2d(X_isomap, new_window=True) X, y = generate_nonlinear_synthetic_data_classification2(n_samples=1000) plot_data_2d(X) X_lpca = feature_reduction_linear_pca(X, 2) plot_data_2d(X_lpca, new_window=True) X_kpca = feature_reduction_kernel_pca(X, 2, 'rbf', 15) plot_data_2d(X_kpca, new_window=True) X_tsne = feature_reduction_tsne(X, 2) plot_data_2d(X_tsne, new_window=True)