@author: henry """ from sklearn.datasets import load_breast_cancer import pandas as pd import HappyML.preprocessor as pp from HappyML.regression import LogisticRegressor from HappyML.performance import ClassificationPerformance import HappyML.model_drawer as md dataset = load_breast_cancer() X = pd.DataFrame(dataset.data, columns=dataset.feature_names) Y = pd.DataFrame(dataset.target, columns=["isBreastCancer"]) selector = pp.KBestSelector() X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X) X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y) X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) regressor = LogisticRegressor() Y_pred = regressor.fit(X_train, Y_train).predict(X_test) pfm = ClassificationPerformance(Y_test, Y_pred) print("Confusion Matrix:\n", pfm.confusion_matrix()) print("Accuracy: {:.2%}".format(pfm.accuracy())) print("Recall: {:.2%}".format(pfm.recall())) print("Precision: {:.2%}".format(pfm.precision()))
X = pp.feature_scaling(X, X) selector = pp.PCASelector(best_k=2) X = selector.fit(X).transform(X) cluster = KMeansCluster() Y_pred = cluster.fit(X).predict(X, "Customer Type") md.cluster_drawer(X, Y_pred, cluster.centroids, "Customers Segmentation", "Microsoft JhengHei") dataset = pp.combine(dataset, Y_pred) X, Y = pp.decomposition(dataset, [i for i in range(18) if i != 0], [18]) selector = pp.KBestSelector() X = selector.fit(X, Y).transform(X) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) classifier = DecisionTree() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) kfp = KFoldClassificationPerformance(X, Y, classifier.classifier) print("----- Decision Tree Classification -----") print("10 Folds Mean Accuracy: {}".format(kfp.accuracy())) print("10 Folds Mean Recall: {}".format(kfp.recall())) print("10 Folds Mean Precision: {}".format(kfp.precision())) print("10 Folds Mean F1_Score: {}".format(kfp.f_score()))
import HappyML.preprocessor as pp from HappyML.classification import RandomForest from HappyML.performance import KFoldClassificationPerformance from random import randint import HappyML.model_drawer as md from IPython.display import Image, display dataset = pp.dataset("Zoo_Data.csv") dataset_classname = pp.dataset("Zoo_Class_Name.csv") class_names = [ row["Class_Type"] for index, row in dataset_classname.iterrows() ] X, Y = pp.decomposition(dataset, [i for i in range(17) if i != 0], [17]) selector = pp.KBestSelector(best_k="auto") X = selector.fit(X, Y, sort=False).transform(X) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) classifier = RandomForest() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) kfp = KFoldClassificationPerformance(X, Y, classifier.classifier) print("Using KBest:") print("----- Random Forest Classification -----") print("{} Folds Mean Accuracy: {}".format(10, kfp.accuracy())) print("{} Folds Mean Recall: {}".format(10, kfp.recall())) print("{} Folds Mean Precision: {}".format(10, kfp.precision())) print("{} Folds Mean F1-Score: {}".format(10, kfp.f_score()))