Ejemplo n.º 1
0
import sys
sys.path.append("E:/New Folder/utils")

import classification_utils as cutils
from sklearn import model_selection, ensemble, tree, neighbors
import xgboost as xgb

#2-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000,
                                                            n_features=2,
                                                            n_classes=2,
                                                            weights=[0.5, 0.5],
                                                            class_sep=2)
X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000,
                                                                noise=0.1)
cutils.plot_data_2d_classification(X, y)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
cutils.plot_data_2d_classification(X_train, y_train)

#grid search for parameter values
dt_estimator = tree.DecisionTreeClassifier()
dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))}
final_estimator = cutils.grid_search_best_model(dt_estimator, dt_grid, X_train,
                                                y_train)
cutils.plot_model_2d_classification(final_estimator, X_train, y_train)

knn_estimator = neighbors.KNeighborsClassifier()
knn_grid = {
    'n_neighbors': list(range(1, 21)),
import sys
sys.path.append("E:/New Folder/utils")

import classification_utils as cutils
from sklearn import model_selection, linear_model

X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000,
                                                            n_features=2,
                                                            n_classes=2,
                                                            weights=[0.8, 0.2],
                                                            class_sep=1.0)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
cutils.plot_data_2d_classification(X_train, y_train)

lr_estimator = linear_model.LogisticRegression()
lr_grid = {'penalty': ['l1', 'l2'], 'C': [0.01, 0.001, 0.1, 0.3, 0.5, 0.7, 1]}
final_estimator = cutils.grid_search_best_model(lr_estimator,
                                                lr_grid,
                                                X_train,
                                                y_train,
                                                scoring='roc_auc')
print(final_estimator.intercept_)
print(final_estimator.coef_)
cutils.plot_model_2d_classification(final_estimator, X_train, y_train)

final_estimator.predict_proba(X_test)
cutils.performance_metrics_soft_binary_classification(final_estimator, X_test,
                                                      y_test)
import sys
sys.path.append('C:/Users/akhram/Desktop/AIML/Machine Learning/Utils')

import classification_utils as cutils
from sklearn import model_selection

#2-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=500,
                                                            n_features=2,
                                                            n_classes=2,
                                                            weights=[0.4, 0.6],
                                                            class_sep=1)
cutils.plot_data_2d_classification(X, y)

X, y = cutils.generate_linear_synthetic_data_classification(
    n_samples=500,
    n_features=3,
    n_classes=3,
    weights=[0.4, 0.3, 0.3],
    class_sep=1)
cutils.plot_data_3d_classification(X, y)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
cutils.plot_data_2d_classification(X_train, y_train)
    grid_search_plot_models_classification
from regression_utils import generate_nonlinear_synthetic_data_regression, generate_nonlinear_synthetic_sine_data_regression, generate_linear_synthetic_data_regression, \
    plot_model_2d_regression, plot_model_3d_regression, plot_data_2d_regression, plot_data_3d_regression, \
    grid_search_plot_models_regression, plot_coefficients_regression, \
    plot_target_and_transformed_target_regression, rmse, regression_performance
from feature_reduction_utils import feature_reduction_linear_pca, feature_reduction_kernel_pca, \
    feature_reduction_tsne, feature_reduction_isomap
from kernel_utils import GaussianFeatures, KernelTransformer
from sklearn.model_selection import train_test_split
from sklearn import metrics, decomposition, manifold
from sklearn import tree, covariance, linear_model, ensemble, neighbors, svm, model_selection, feature_selection
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

X, y = generate_linear_synthetic_data_classification(n_samples=1000, n_features=3, n_redundant=0, n_classes=3, weights=[.3,.3,.4])
plot_data_3d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2)
plot_data_2d(X_kpca, new_window=True)
X_tsne = feature_reduction_tsne(X, 2)
plot_data_2d(X_tsne, new_window=True)
X_isomap = feature_reduction_isomap(X, 2)
plot_data_2d(X_isomap, new_window=True)

X, y = generate_nonlinear_synthetic_data_classification2(n_samples=1000)
plot_data_2d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2, 'rbf', 15)
import sys
sys.path.append("E:/")

import classification_utils as cutils
from sklearn import model_selection, neighbors

#2-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(
    n_samples=1000, n_features=2, n_classes=4, weights=[0.3, 0.3, 0.2, 0.2])
cutils.plot_data_2d_classification(X, y)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
ax = cutils.plot_data_2d_classification(X_train, y_train)
cutils.plot_data_2d_classification(X_test,
                                   y_test,
                                   ax,
                                   marker='x',
                                   s=70,
                                   legend=False)

#grid search for parameter values
knn_estimator = neighbors.KNeighborsClassifier()
knn_grid = {
    'n_neighbors': list(range(1, 21)),
    'weights': ['uniform', 'distance']
}
knn_grid_estimator = model_selection.GridSearchCV(knn_estimator,
                                                  knn_grid,
                                                  cv=10,
                                                  refit=True)
Ejemplo n.º 6
0
import sys
sys.path.append("E:/")

import classification_utils as cutils
import numpy as np

#2-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000,
                                                            n_features=10,
                                                            n_classes=2,
                                                            weights=[0.5, 0.5])

np.cov(X, rowvar=False)
np.corrcoef(X, rowvar=False)

X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000,
                                                            n_features=10,
                                                            n_classes=2,
                                                            weights=[0.5, 0.5],
                                                            n_redundant=4)

np.cov(X, rowvar=False)
np.corrcoef(X, rowvar=False)
Ejemplo n.º 7
0
import sys

path = 'C:/Users/akhram/Desktop/AIML/Machine Learning/Utils'
sys.path.append(path)
import common_utils as utils
import pca_utils as putils
import tsne_utils as tutils
import classification_utils as cutils
import pandas as pd
import numpy as np
from sklearn import decomposition

#pca effect on linearly related data
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000,
                                                            n_features=2,
                                                            n_redundant=0,
                                                            n_classes=2,
                                                            class_sep=0,
                                                            weights=[.5, .5])
X = pd.DataFrame(X, columns=['X1', 'X2'])
utils.plot_data_2d(X)
print(X.corr())

lpca = decomposition.PCA(n_components=0.96)
lpca.fit(X)
print(lpca.components_)
print(lpca.explained_variance_)
print(lpca.explained_variance_ratio_)
np.cumsum(lpca.explained_variance_ratio_)
putils.plot_pca_result(lpca, X)

#pca effect on linearly related data(1 redundant feature)
import sys
sys.path.append("E:/")

import classification_utils as cutils
from sklearn import model_selection
import numpy as np
import pandas as pd
import os

#2-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=2, n_classes=4, weights=[0.3,0.3,0.2,0.2])
cutils.plot_data_2d_classification(X, y)

X, y = cutils.generate_nonlinear_synthetic_data_classification1(n_samples=1000, n_features=2, n_classes=2)
cutils.plot_data_2d_classification(X, y)

X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000, noise=0.05)
cutils.plot_data_2d_classification(X, y)

X, y = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=1000, noise=0.05)
cutils.plot_data_2d_classification(X, y)

#3-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=3, n_classes=5, weights=list(np.repeat(0.1,5)))
cutils.plot_data_3d_classification(X, y)

X, y = cutils.generate_nonlinear_synthetic_data_classification1(n_samples=1000, n_features=3, n_classes=2)
cutils.plot_data_3d_classification(X, y)

dir = 'E:/'
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))