def working_kernel_PCA_easy(): #Read data MLobj = EasyClassi() MLobj.read("Social_Network_Ads.csv") #Prepare data MLobj.explore() MLobj.split_X_y() MLobj.X = MLobj.X[:, 2:4] MLobj.split_ds() MLobj.scale_features(scaleY=False) #Applyinh Kernel PCA MLobj.applyKernelPCA() #Classification MLobj.fitLog() #Predict y_pred = MLobj.predict() #Evaluation confusion matrix cm = MLobj.create_confusion_matrix() MLobj.printModelPerformance() #Visualize data #MLobj.visualize_lineal_2D_class(MLobj.X_train,MLobj.y_train,x1="KPC1",x2="KPC2") MLobj.visualize_lineal_2D_class(x1="KPC1", x2="KPC2")
def working_class_dec_tree_easy(): #Read data MLobj = EasyClassi() MLobj.read("Social_Network_Ads.csv") #Prepare data MLobj.explore() MLobj.split_X_y() MLobj.X = MLobj.X[:, 2:4] MLobj.split_ds(test_set=1 / 4) MLobj.scale_features(scaleY=False) #Classification MLobj.fitDecTree() #Predict y_pred = MLobj.predict() #Evaluation confusion matrix cm = MLobj.create_confusion_matrix() print(cm) #Visualize data MLobj.visualize_lineal_2D_class(MLobj.X_train, MLobj.y_train) MLobj.visualize_lineal_2D_class()
def working_k_fold_cross_easy(): #Read data MLobj = EasyClassi() MLobj.read("Social_Network_Ads.csv") #Prepare data MLobj.explore() MLobj.split_X_y() MLobj.X = MLobj.X[:, 2:4] MLobj.split_ds(test_set=1 / 4) MLobj.scale_features(scaleY=False) #Classification MLobj.fitKernelSVM() #Predict y_pred = MLobj.predict() #Evaluation confusion matrix cm = MLobj.create_confusion_matrix() #Applying K-Fold Cross validation MLobj.apply_class_k_fold() MLobj.print_k_fold_perf()
def working_ANN(): #Read data MLobj = EasyClassi() MLobj.read("Churn_Modelling.csv") #Prepare data MLobj.explore() MLobj.split_X_y() MLobj.X = MLobj.X[:, 3:] #Encode MLobj.encode_and_dummy([1, 2], [1], encode_y=False, removeFirstColumn=True) #Split test and training set MLobj.split_ds(test_set=0.2) #Scale features MLobj.scale_features() #Initialising the ANN classifier = Sequential() #Defining ANN model. #Number of hiden layers is avg(#inputNode,#outputNodes)=6 classifier.add( Dense(output_dim=6, init="uniform", activation="relu", input_dim=11)) classifier.add(Dense(output_dim=6, init="uniform", activation="relu")) classifier.add(Dense(output_dim=1, init="uniform", activation="sigmoid")) #Compile ANN classifier.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) #Fit classifier.fit(MLobj.X_train, MLobj.y_train, batch_size=10, nb_epoch=100) #Predict y_pred = classifier.predict(MLobj.X_test) #Convert y_pred to True or False with 0.5 y_pred = (y_pred > 0.5) from sklearn.metrics import confusion_matrix cm = confusion_matrix(MLobj.y_test, y_pred)
def working_grid_search(): #Read data MLobj = EasyClassi() MLobj.read("Social_Network_Ads.csv") #Prepare data MLobj.explore() MLobj.split_X_y() MLobj.X = MLobj.X[:, 2:4] MLobj.split_ds(test_set=1 / 4) MLobj.scale_features(scaleY=False) #Classification MLobj.fitKernelSVM() #Predict y_pred = MLobj.predict() #Evaluation confusion matrix cm = MLobj.create_confusion_matrix() #Applying K-Fold Cross validation MLobj.apply_class_k_fold() MLobj.print_k_fold_perf() #Apply grid search to find the best model and best parameters from sklearn.model_selection import GridSearchCV parameters = [{ 'C': [1, 10, 100, 1000], "kernel": ["linear"] }, { 'C': [1, 10, 100, 1000], "kernel": ["rbf"], "gamma": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] }] grid_search = GridSearchCV(estimator=MLobj.classifier, param_grid=parameters, scoring="accuracy", cv=10, n_jobs=-1) grid_search = grid_search.fit(MLobj.X_train, MLobj.y_train) best_accuracy = grid_search.best_score_ best_parameters = grid_search.best_params_
def working_grid_search_easy(): #Read data MLobj = EasyClassi() MLobj.read("Social_Network_Ads.csv") #Prepare data MLobj.explore() MLobj.split_X_y() MLobj.X = MLobj.X[:, 2:4] MLobj.split_ds(test_set=1 / 4) MLobj.scale_features(scaleY=False) #Classification MLobj.fitKernelSVM() #Predict y_pred = MLobj.predict() #Evaluation confusion matrix cm = MLobj.create_confusion_matrix() #Applying K-Fold Cross validation MLobj.apply_class_k_fold() MLobj.print_k_fold_perf() #Apply grid search to find the best model and best parameters parameters = [{ 'C': [1, 10, 100, 1000], "kernel": ["linear"] }, { 'C': [1, 10, 100, 1000], "kernel": ["rbf"], "gamma": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] }] MLobj.apply_grid_search(paramsGS=parameters) MLobj.print_grid_search_perf()
def working_class_logistic(): #Read data MLobj = EasyClassi() MLobj.read("Social_Network_Ads.csv") #Prepare data MLobj.explore() MLobj.split_X_y() MLobj.X = MLobj.X[:, 2:4] MLobj.split_ds(test_set=1 / 4) MLobj.scale_features(scaleY=False) #Regression with random forrest from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) classifier.fit(MLobj.X_train, MLobj.y_train) #Predict y_pred = classifier.predict(MLobj.X_test) print(y_pred) #Making confusing matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(MLobj.y_test, y_pred) # Visualising the Training set results from matplotlib.colors import ListedColormap X_set, y_set = MLobj.X_train, MLobj.y_train X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel() ]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=ListedColormap(('red', 'green'))(i), label=j) plt.title('Logistic Regression (Training set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() # Visualising the Test set results from matplotlib.colors import ListedColormap X_set, y_set = MLobj.X_test, MLobj.y_test X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel() ]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=ListedColormap(('red', 'green'))(i), label=j) plt.title('Logistic Regression (Test set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show()