import sys sys.path.append("E:/New Folder/utils") import classification_utils as cutils from sklearn import model_selection, ensemble, tree, neighbors import xgboost as xgb #2-d classification pattern X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=2, n_classes=2, weights=[0.5, 0.5], class_sep=2) X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000, noise=0.1) cutils.plot_data_2d_classification(X, y) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) cutils.plot_data_2d_classification(X_train, y_train) #grid search for parameter values dt_estimator = tree.DecisionTreeClassifier() dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))} final_estimator = cutils.grid_search_best_model(dt_estimator, dt_grid, X_train, y_train) cutils.plot_model_2d_classification(final_estimator, X_train, y_train) knn_estimator = neighbors.KNeighborsClassifier() knn_grid = { 'n_neighbors': list(range(1, 21)),
import sys sys.path.append("E:/New Folder/utils") import classification_utils as cutils from sklearn import model_selection, linear_model X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=2, n_classes=2, weights=[0.8, 0.2], class_sep=1.0) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) cutils.plot_data_2d_classification(X_train, y_train) lr_estimator = linear_model.LogisticRegression() lr_grid = {'penalty': ['l1', 'l2'], 'C': [0.01, 0.001, 0.1, 0.3, 0.5, 0.7, 1]} final_estimator = cutils.grid_search_best_model(lr_estimator, lr_grid, X_train, y_train, scoring='roc_auc') print(final_estimator.intercept_) print(final_estimator.coef_) cutils.plot_model_2d_classification(final_estimator, X_train, y_train) final_estimator.predict_proba(X_test) cutils.performance_metrics_soft_binary_classification(final_estimator, X_test, y_test)
import sys sys.path.append('C:/Users/akhram/Desktop/AIML/Machine Learning/Utils') import classification_utils as cutils from sklearn import model_selection #2-d classification pattern X, y = cutils.generate_linear_synthetic_data_classification(n_samples=500, n_features=2, n_classes=2, weights=[0.4, 0.6], class_sep=1) cutils.plot_data_2d_classification(X, y) X, y = cutils.generate_linear_synthetic_data_classification( n_samples=500, n_features=3, n_classes=3, weights=[0.4, 0.3, 0.3], class_sep=1) cutils.plot_data_3d_classification(X, y) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) cutils.plot_data_2d_classification(X_train, y_train)
grid_search_plot_models_classification from regression_utils import generate_nonlinear_synthetic_data_regression, generate_nonlinear_synthetic_sine_data_regression, generate_linear_synthetic_data_regression, \ plot_model_2d_regression, plot_model_3d_regression, plot_data_2d_regression, plot_data_3d_regression, \ grid_search_plot_models_regression, plot_coefficients_regression, \ plot_target_and_transformed_target_regression, rmse, regression_performance from feature_reduction_utils import feature_reduction_linear_pca, feature_reduction_kernel_pca, \ feature_reduction_tsne, feature_reduction_isomap from kernel_utils import GaussianFeatures, KernelTransformer from sklearn.model_selection import train_test_split from sklearn import metrics, decomposition, manifold from sklearn import tree, covariance, linear_model, ensemble, neighbors, svm, model_selection, feature_selection import pandas as pd import numpy as np import matplotlib.pyplot as plt X, y = generate_linear_synthetic_data_classification(n_samples=1000, n_features=3, n_redundant=0, n_classes=3, weights=[.3,.3,.4]) plot_data_3d(X) X_lpca = feature_reduction_linear_pca(X, 2) plot_data_2d(X_lpca, new_window=True) X_kpca = feature_reduction_kernel_pca(X, 2) plot_data_2d(X_kpca, new_window=True) X_tsne = feature_reduction_tsne(X, 2) plot_data_2d(X_tsne, new_window=True) X_isomap = feature_reduction_isomap(X, 2) plot_data_2d(X_isomap, new_window=True) X, y = generate_nonlinear_synthetic_data_classification2(n_samples=1000) plot_data_2d(X) X_lpca = feature_reduction_linear_pca(X, 2) plot_data_2d(X_lpca, new_window=True) X_kpca = feature_reduction_kernel_pca(X, 2, 'rbf', 15)
import sys sys.path.append("E:/") import classification_utils as cutils from sklearn import model_selection, neighbors #2-d classification pattern X, y = cutils.generate_linear_synthetic_data_classification( n_samples=1000, n_features=2, n_classes=4, weights=[0.3, 0.3, 0.2, 0.2]) cutils.plot_data_2d_classification(X, y) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) ax = cutils.plot_data_2d_classification(X_train, y_train) cutils.plot_data_2d_classification(X_test, y_test, ax, marker='x', s=70, legend=False) #grid search for parameter values knn_estimator = neighbors.KNeighborsClassifier() knn_grid = { 'n_neighbors': list(range(1, 21)), 'weights': ['uniform', 'distance'] } knn_grid_estimator = model_selection.GridSearchCV(knn_estimator, knn_grid, cv=10, refit=True)
import sys sys.path.append("E:/") import classification_utils as cutils import numpy as np #2-d classification pattern X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=10, n_classes=2, weights=[0.5, 0.5]) np.cov(X, rowvar=False) np.corrcoef(X, rowvar=False) X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=10, n_classes=2, weights=[0.5, 0.5], n_redundant=4) np.cov(X, rowvar=False) np.corrcoef(X, rowvar=False)
import sys path = 'C:/Users/akhram/Desktop/AIML/Machine Learning/Utils' sys.path.append(path) import common_utils as utils import pca_utils as putils import tsne_utils as tutils import classification_utils as cutils import pandas as pd import numpy as np from sklearn import decomposition #pca effect on linearly related data X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=2, n_redundant=0, n_classes=2, class_sep=0, weights=[.5, .5]) X = pd.DataFrame(X, columns=['X1', 'X2']) utils.plot_data_2d(X) print(X.corr()) lpca = decomposition.PCA(n_components=0.96) lpca.fit(X) print(lpca.components_) print(lpca.explained_variance_) print(lpca.explained_variance_ratio_) np.cumsum(lpca.explained_variance_ratio_) putils.plot_pca_result(lpca, X) #pca effect on linearly related data(1 redundant feature)
import sys sys.path.append("E:/") import classification_utils as cutils from sklearn import model_selection import numpy as np import pandas as pd import os #2-d classification pattern X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=2, n_classes=4, weights=[0.3,0.3,0.2,0.2]) cutils.plot_data_2d_classification(X, y) X, y = cutils.generate_nonlinear_synthetic_data_classification1(n_samples=1000, n_features=2, n_classes=2) cutils.plot_data_2d_classification(X, y) X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000, noise=0.05) cutils.plot_data_2d_classification(X, y) X, y = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=1000, noise=0.05) cutils.plot_data_2d_classification(X, y) #3-d classification pattern X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000, n_features=3, n_classes=5, weights=list(np.repeat(0.1,5))) cutils.plot_data_3d_classification(X, y) X, y = cutils.generate_nonlinear_synthetic_data_classification1(n_samples=1000, n_features=3, n_classes=2) cutils.plot_data_3d_classification(X, y) dir = 'E:/' titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))