import sys sys.path.append("E:/New Folder/utils") import classification_utils as cutils from sklearn import model_selection, ensemble, tree, neighbors import xgboost as xgb #2-d classification pattern X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=2, n_classes=2, weights=[0.5, 0.5], class_sep=2) X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000, noise=0.1) cutils.plot_data_2d_classification(X, y) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) cutils.plot_data_2d_classification(X_train, y_train) #grid search for parameter values dt_estimator = tree.DecisionTreeClassifier() dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))} final_estimator = cutils.grid_search_best_model(dt_estimator, dt_grid, X_train, y_train) cutils.plot_model_2d_classification(final_estimator, X_train, y_train) knn_estimator = neighbors.KNeighborsClassifier() knn_grid = { 'n_neighbors': list(range(1, 21)),
import sys sys.path.append("E:/utils") import classification_utils as cutils from sklearn import preprocessing import numpy as np X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000, noise=0.1) X, y = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=1000, noise=0.1) cutils.plot_data_2d_classification(X, y) #guassian basis transformation tmp = np.exp(-(X**2).sum(1)) X_3d = np.c_[X, tmp] cutils.plot_data_3d_classification( X_3d, y, new_window=True, title="Linearly separable data in 3D with basis change") #polynomial basis transformation poly_features = preprocessing.PolynomialFeatures() X_poly1 = poly_features.fit_transform(X) poly_features = preprocessing.PolynomialFeatures(degree=3) X_poly2 = poly_features.fit_transform(X)
# project the 64-dimensional data to a lower dimension pca = decomposition.PCA(n_components=30, whiten=False) pca_digits = pca.fit_transform(digits.data) print(pca.explained_variance_) print(pca.explained_variance_ratio_) print(pca.components_) print(pca_digits.shape) #incorrect visualization with only 2 pcs utils.plot_data_2d(pca_digits[:, 0:2]) #tsne on pca data tsne = manifold.TSNE() tsne_digits = tsne.fit_transform(pca_digits) print(tsne.kl_divergence_) print(tsne_digits.shape) cutils.plot_data_2d_classification(tsne_digits, digits.target) #using GMM gmm_estimator = mixture.GaussianMixture() gmm_params = {'n_components': np.arange(50, 200, 10)} gmm_grid_estimator = model_selection.GridSearchCV(gmm_estimator, gmm_params) gmm_grid_estimator.fit(pca_digits) gmm_best_estimator = gmm_grid_estimator.best_estimator_ pca_new_data = gmm_best_estimator.sample(1000) print(pca_new_data[0].shape) new_tsne_digits = tsne.fit_transform(pca_new_data[0]) utils.plot_data_2d(new_tsne_digits) digits_new = pca.inverse_transform(pca_new_data[0]) print(digits_new.shape)
import sys sys.path.append("E:/") import classification_utils as cutils from sklearn import model_selection, tree, neighbors import pydot import io import os import pandas as pd #2-d classification pattern X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=2, n_classes=2, weights=[0.5, 0.5]) cutils.plot_data_2d_classification(X, y) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) ax = cutils.plot_data_2d_classification(X_train, y_train) cutils.plot_data_2d_classification(X_test, y_test, ax, marker='x', s=70, legend=False) #grid search for parameter values dt_estimator = tree.DecisionTreeClassifier() dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))} dt_grid_estimator = model_selection.GridSearchCV(dt_estimator,
import sys ''' This module provides access to some variables used or maintained by the interpreter and to functions that interact strongly with the interpreter. It is always available. ''' sys.path.append('C://Users//tauseef.ur.rahman//Desktop//MyPythonfiles') #Sys.path is python installation path and append will add new path import classification_utils as cutils from sklearn import model_selection, metrics, neighbors X, y = cutils.generate_linear_synthetic_data_classification( n_samples=1000, n_features=2, n_classes=4, weights=[0.3, 0.3, 0.3, 0.3]) #make_classification X : array of shape [n_samples, n_features] it will features and its values, #y : array of shape [n_samples] The integer value or labels for class membership of each sample cutils.plot_data_2d_classification(X, y) X_train, X_text, Y_train, Y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) # model_selection.train_test_split - Split arrays or matrices into random train and test subsets #test_size - represent the proportion of the dataset to include in the test split # random_state - If int, random_state is the seed used by the random number generator cutils.plot_data_2d_classification(X_train, Y_train) cutils.plot_data_2d_classification(X_text, Y_test) knn_estimator = neighbors.KNeighborsClassifier() knn_estimator.fit(X_train, Y_train) cutils.plot_model_2d_classification(knn_estimator, X_train, Y_train) y_pred = knn_estimator.predict(X_text) metrics.accuracy_score(Y_test, y_pred) ''' In multilabel classification, this function computes subset accuracy:
decoded = Dense(input_size, activation='linear')(encoded) autoencoder = Model(input, decoded) print(autoencoder.summary()) autoencoder.compile(optimizer='adam', loss='mean_squared_error') autoencoder.fit(X_train, X_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_split=0.2) decoded_imgs = autoencoder.predict(X_test) plot(20, X_test, decoded_imgs) encoder = Model(input, encoded) tmp = encoder.predict(X_train) tsne = manifold.TSNE() tsne_data = tsne.fit_transform(tmp) cutils.plot_data_2d_classification(tsne_data, y_train) #non-linear pca input = Input(shape=(input_size,)) encoded = Dense(encoding_size, activation='sigmoid')(input) decoded = Dense(input_size, activation='linear')(encoded) autoencoder = Model(input, decoded) print(autoencoder.summary()) autoencoder.compile(optimizer='adam', loss='mean_squared_error') autoencoder.fit(X_train, X_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_split=0.2) decoded_imgs = autoencoder.predict(X_test) plot(20, X_test, decoded_imgs) encoder = Model(input, encoded)