예제 #1
0
@author: henry
"""

from sklearn.datasets import load_breast_cancer
import pandas as pd
import HappyML.preprocessor as pp
from HappyML.regression import LogisticRegressor
from HappyML.performance import ClassificationPerformance
import HappyML.model_drawer as md

dataset = load_breast_cancer()

X = pd.DataFrame(dataset.data, columns=dataset.feature_names)
Y = pd.DataFrame(dataset.target, columns=["isBreastCancer"])

selector = pp.KBestSelector()
X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y)

X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))

regressor = LogisticRegressor()
Y_pred = regressor.fit(X_train, Y_train).predict(X_test)

pfm = ClassificationPerformance(Y_test, Y_pred)

print("Confusion Matrix:\n", pfm.confusion_matrix())
print("Accuracy: {:.2%}".format(pfm.accuracy()))
print("Recall: {:.2%}".format(pfm.recall()))
print("Precision: {:.2%}".format(pfm.precision()))
예제 #2
0
X = pp.feature_scaling(X, X)

selector = pp.PCASelector(best_k=2)
X = selector.fit(X).transform(X)

cluster = KMeansCluster()
Y_pred = cluster.fit(X).predict(X, "Customer Type")

md.cluster_drawer(X, Y_pred, cluster.centroids, "Customers Segmentation",
                  "Microsoft JhengHei")

dataset = pp.combine(dataset, Y_pred)

X, Y = pp.decomposition(dataset, [i for i in range(18) if i != 0], [18])

selector = pp.KBestSelector()
X = selector.fit(X, Y).transform(X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)

classifier = DecisionTree()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

kfp = KFoldClassificationPerformance(X, Y, classifier.classifier)

print("----- Decision Tree Classification -----")
print("10 Folds Mean Accuracy: {}".format(kfp.accuracy()))
print("10 Folds Mean Recall: {}".format(kfp.recall()))
print("10 Folds Mean Precision: {}".format(kfp.precision()))
print("10 Folds Mean F1_Score: {}".format(kfp.f_score()))
예제 #3
0
import HappyML.preprocessor as pp
from HappyML.classification import RandomForest
from HappyML.performance import KFoldClassificationPerformance
from random import randint
import HappyML.model_drawer as md
from IPython.display import Image, display

dataset = pp.dataset("Zoo_Data.csv")
dataset_classname = pp.dataset("Zoo_Class_Name.csv")
class_names = [
    row["Class_Type"] for index, row in dataset_classname.iterrows()
]

X, Y = pp.decomposition(dataset, [i for i in range(17) if i != 0], [17])

selector = pp.KBestSelector(best_k="auto")
X = selector.fit(X, Y, sort=False).transform(X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)

classifier = RandomForest()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

kfp = KFoldClassificationPerformance(X, Y, classifier.classifier)

print("Using KBest:")
print("----- Random Forest Classification -----")
print("{} Folds Mean Accuracy: {}".format(10, kfp.accuracy()))
print("{} Folds Mean Recall: {}".format(10, kfp.recall()))
print("{} Folds Mean Precision: {}".format(10, kfp.precision()))
print("{} Folds Mean F1-Score: {}".format(10, kfp.f_score()))