Beispiel #1
0
import sys
sys.path.append("E:/New Folder/utils")

import classification_utils as cutils
from sklearn import model_selection, ensemble, tree, neighbors
import xgboost as xgb

#2-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000,
                                                            n_features=2,
                                                            n_classes=2,
                                                            weights=[0.5, 0.5],
                                                            class_sep=2)
X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000,
                                                                noise=0.1)
cutils.plot_data_2d_classification(X, y)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
cutils.plot_data_2d_classification(X_train, y_train)

#grid search for parameter values
dt_estimator = tree.DecisionTreeClassifier()
dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))}
final_estimator = cutils.grid_search_best_model(dt_estimator, dt_grid, X_train,
                                                y_train)
cutils.plot_model_2d_classification(final_estimator, X_train, y_train)

knn_estimator = neighbors.KNeighborsClassifier()
knn_grid = {
    'n_neighbors': list(range(1, 21)),
Beispiel #2
0
import sys
sys.path.append("E:/utils")

import classification_utils as cutils
from sklearn import preprocessing
import numpy as np

X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000,
                                                                noise=0.1)
X, y = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=1000,
                                                                noise=0.1)

cutils.plot_data_2d_classification(X, y)

#guassian basis transformation
tmp = np.exp(-(X**2).sum(1))
X_3d = np.c_[X, tmp]
cutils.plot_data_3d_classification(
    X_3d,
    y,
    new_window=True,
    title="Linearly separable data in 3D with basis change")

#polynomial basis transformation
poly_features = preprocessing.PolynomialFeatures()
X_poly1 = poly_features.fit_transform(X)

poly_features = preprocessing.PolynomialFeatures(degree=3)
X_poly2 = poly_features.fit_transform(X)
# project the 64-dimensional data to a lower dimension
pca = decomposition.PCA(n_components=30, whiten=False)
pca_digits = pca.fit_transform(digits.data)
print(pca.explained_variance_)
print(pca.explained_variance_ratio_)
print(pca.components_)
print(pca_digits.shape)
#incorrect visualization with only 2 pcs
utils.plot_data_2d(pca_digits[:, 0:2])

#tsne on pca  data
tsne = manifold.TSNE()
tsne_digits = tsne.fit_transform(pca_digits)
print(tsne.kl_divergence_)
print(tsne_digits.shape)
cutils.plot_data_2d_classification(tsne_digits, digits.target)

#using GMM
gmm_estimator = mixture.GaussianMixture()
gmm_params = {'n_components': np.arange(50, 200, 10)}
gmm_grid_estimator = model_selection.GridSearchCV(gmm_estimator, gmm_params)
gmm_grid_estimator.fit(pca_digits)
gmm_best_estimator = gmm_grid_estimator.best_estimator_

pca_new_data = gmm_best_estimator.sample(1000)
print(pca_new_data[0].shape)
new_tsne_digits = tsne.fit_transform(pca_new_data[0])
utils.plot_data_2d(new_tsne_digits)

digits_new = pca.inverse_transform(pca_new_data[0])
print(digits_new.shape)
Beispiel #4
0
import sys
sys.path.append("E:/")

import classification_utils as cutils
from sklearn import model_selection, tree, neighbors
import pydot
import io
import os
import pandas as pd

#2-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000,
                                                            n_features=2,
                                                            n_classes=2,
                                                            weights=[0.5, 0.5])
cutils.plot_data_2d_classification(X, y)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
ax = cutils.plot_data_2d_classification(X_train, y_train)
cutils.plot_data_2d_classification(X_test,
                                   y_test,
                                   ax,
                                   marker='x',
                                   s=70,
                                   legend=False)

#grid search for parameter values
dt_estimator = tree.DecisionTreeClassifier()
dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))}
dt_grid_estimator = model_selection.GridSearchCV(dt_estimator,
Beispiel #5
0
import sys
'''
This module provides access to some variables used or maintained by the interpreter and to 
functions that interact strongly with the interpreter. It is always available.
'''
sys.path.append('C://Users//tauseef.ur.rahman//Desktop//MyPythonfiles')
#Sys.path is python installation path and append will add new path
import classification_utils as cutils
from sklearn import model_selection, metrics, neighbors
X, y = cutils.generate_linear_synthetic_data_classification(
    n_samples=1000, n_features=2, n_classes=4, weights=[0.3, 0.3, 0.3, 0.3])
#make_classification X : array of shape [n_samples, n_features] it will features and its values,
#y : array of shape [n_samples]  The integer value or labels for class membership of each sample
cutils.plot_data_2d_classification(X, y)

X_train, X_text, Y_train, Y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
# model_selection.train_test_split - Split arrays or matrices into random train and test subsets
#test_size - represent the proportion of the dataset to include in the test split
# random_state - If int, random_state is the seed used by the random number generator
cutils.plot_data_2d_classification(X_train, Y_train)
cutils.plot_data_2d_classification(X_text, Y_test)

knn_estimator = neighbors.KNeighborsClassifier()
knn_estimator.fit(X_train, Y_train)
cutils.plot_model_2d_classification(knn_estimator, X_train, Y_train)

y_pred = knn_estimator.predict(X_text)
metrics.accuracy_score(Y_test, y_pred)
'''
In multilabel classification, this function computes subset accuracy: 
decoded = Dense(input_size, activation='linear')(encoded)
autoencoder = Model(input, decoded)
print(autoencoder.summary())

autoencoder.compile(optimizer='adam', loss='mean_squared_error')
autoencoder.fit(X_train, X_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_split=0.2)

decoded_imgs = autoencoder.predict(X_test)
plot(20, X_test, decoded_imgs)

encoder = Model(input, encoded)
tmp = encoder.predict(X_train)

tsne = manifold.TSNE()
tsne_data = tsne.fit_transform(tmp)
cutils.plot_data_2d_classification(tsne_data, y_train)

 #non-linear pca
input = Input(shape=(input_size,))
encoded = Dense(encoding_size, activation='sigmoid')(input)
decoded = Dense(input_size, activation='linear')(encoded)
autoencoder = Model(input, decoded)
print(autoencoder.summary())

autoencoder.compile(optimizer='adam', loss='mean_squared_error')
autoencoder.fit(X_train, X_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_split=0.2)

decoded_imgs = autoencoder.predict(X_test)
plot(20, X_test, decoded_imgs)

encoder = Model(input, encoded)