Example #1
0
# -*- coding: utf-8 -*-
"""
Created on Tue Sep  7 09:06:38 2021

@author: henry
"""

import HappyML.preprocessor as pp
from HappyML.clustering import KMeansCluster
import HappyML.model_drawer as md
from HappyML.classification import DecisionTree
from HappyML.performance import KFoldClassificationPerformance
from IPython.display import Image, display

dataset = pp.dataset("CreditCards.csv")

dataset = pp.missing_data(dataset)

X = pp.decomposition(dataset, [i for i in range(18) if i != 0])

X = pp.feature_scaling(X, X)

selector = pp.PCASelector(best_k=2)
X = selector.fit(X).transform(X)

cluster = KMeansCluster()
Y_pred = cluster.fit(X).predict(X, "Customer Type")

md.cluster_drawer(X, Y_pred, cluster.centroids, "Customers Segmentation",
                  "Microsoft JhengHei")
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 23 20:11:45 2021

@author: henry
"""

# In[]
import HappyML.preprocessor as pp

dataset = pp.dataset(file="Mushrooms.csv")

X, Y = pp.decomposition(dataset,
                        x_columns=[i for i in range(1, 23)],
                        y_columns=[0])

X = pp.onehot_encoder(X, columns=[i for i in range(22)], remove_trap=True)
Y, Y_mapping = pp.label_encoder(Y, mapping=True)

from HappyML.preprocessor import KBestSelector

selector = KBestSelector(best_k="auto")
X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y)

# In[]
from HappyML.classification import DecisionTree

classifier = DecisionTree()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)
Example #3
0
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 29 05:49:05 2021

@author: henry
"""

import HappyML.preprocessor as pp
from HappyML.classification import DecisionTree
from HappyML.performance import KFoldClassificationPerformance
import HappyML.model_drawer as md
from IPython.display import Image, display

dataset = pp.dataset("HR-Employee-Attrition.csv")

X, Y = pp.decomposition(dataset, [i for i in range(35) if i != 1], [1])
X = pp.onehot_encoder(X, [1, 3, 6, 10, 14, 16, 20, 21], True)
Y, Y_mapping = pp.label_encoder(Y, True)

selector = pp.KBestSelector()
X = selector.fit(X, Y, True, True).transform(X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)

classifier = DecisionTree()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

K = 10
kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K)

print("----- Decision Tree Classification -----")
Example #4
0
# -*- coding: utf-8 -*-
"""
Created on Mon Aug  2 20:53:59 2021

@author: henry
"""

# In[]
import HappyML.preprocessor as pp

dataset = pp.dataset(
    "C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/Position_Salaries.csv"
)

X, Y = pp.decomposition(dataset, x_columns=[1], y_columns=[2])

X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X,
                                                       y_ary=Y,
                                                       train_size=0.8)

# In[]
from HappyML.regression import SimpleRegressor
import HappyML.model_drawer as md

reg_simple = SimpleRegressor()
Y_simple = reg_simple.fit(x_train=X, y_train=Y).predict(X)

md.sample_model(sample_data=(X, Y), model_data=(X, Y_simple))
print("R-Squared of Simple Regression:", reg_simple.r_score(x_test=X,
                                                            y_test=Y))
Example #5
0
# -*- coding: utf-8 -*-
"""
Created on Sun Aug  1 11:40:29 2021

@author: henry
"""

from HappyML import preprocessor as pp
from HappyML.regression import SimpleRegressor
import pandas as pd
from HappyML import model_drawer as md

dataset_h = pp.dataset("Student_Height.csv")
dataset_w = pp.dataset("Student_Weight.csv")

X_h, Y_h = pp.decomposition(dataset_h, [1], [3, 4])
X_w, Y_w = pp.decomposition(dataset_w, [1], [3, 4])

X_h_train, X_h_test, Y_h_train, Y_h_test = pp.split_train_test(X_h, Y_h)
X_w_train, X_w_test, Y_w_train, Y_w_test = pp.split_train_test(X_w, Y_w)

regressor = [[SimpleRegressor(), SimpleRegressor()],
             [SimpleRegressor(), SimpleRegressor()]]
regressor[0][0].fit(X_h_train, Y_h_train.iloc[:, 0].to_frame())
regressor[0][1].fit(X_h_train, Y_h_train.iloc[:, 1].to_frame())
regressor[1][0].fit(X_w_train, Y_w_train.iloc[:, 0].to_frame())
regressor[1][1].fit(X_w_train, Y_w_train.iloc[:, 1].to_frame())

print("台灣 6~15 歲學童身高、體重評估系統\n")
gender = eval(input("請輸入您的性別(1.男 2.女):")) - 1
age = eval(input("請輸入您的年齡(6-15):"))
Example #6
0
# -*- coding: utf-8 -*-
"""
Created on Mon Aug  2 18:25:36 2021

@author: henry
"""

# In[]
import HappyML.preprocessor as pp

dataset = pp.dataset("C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/50_Startups.csv")
X, Y = pp.decomposition(dataset, [0, 1, 2, 3], [4])

# X = pp.onehot_encoder(X, columns=[3])
# X = pp.remove_columns(X, [3])

X = pp.onehot_encoder(X, columns=[3], remove_trap=True)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8)

X_train, X_test= pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))
Y_train, Y_test= pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test))

# In[]
from HappyML.regression import SimpleRegressor

simple_reg = SimpleRegressor()
Y_pred_simple = simple_reg.fit(X_train, Y_train).predict(X_test)

# R-Squared always increase in multiple linear regression --> Use Adjusted R-Squared instead
print("Goodness of Model (R-Squared Score):", simple_reg.r_score(X_test, Y_test))
Example #7
0
# from HappyML.preprocessor import KBestSelector
# selector = KBestSelector(best_k=2)
# X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)

# # Split Training / Testing Set
# X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y)

# # Feature Scaling
# X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))

# In[]
import HappyML.preprocessor as pp

# Load Data, also can be loaded by sklearn.datasets.load_wine()
dataset = pp.dataset(
    "C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch09 Random Forests/Wine.csv"
)

# Decomposition
X, Y = pp.decomposition(dataset,
                        x_columns=[i for i in range(13)],
                        y_columns=[13])

# Feature Scaling
X = pp.feature_scaling(fit_ary=X, transform_arys=X)

# PCA without HappyML's Class
# from sklearn.decomposition import PCA
# import numpy as np
# import matplotlib.pyplot as plt
# import pandas as pd
Example #8
0
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 15 10:10:12 2019

@author: 俊男
"""

# In[] Pre-processing
from HappyML import preprocessor as pp

# Dataset Loading
dataset = pp.dataset("Salary_Data.csv")

# Independent/Dependent Variables Decomposition
X, Y = pp.decomposition(dataset, [0], [1])

# Split Training vs. Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=2 / 3)

# Feature Scaling (optional)
X_train, X_test = pp.feature_scaling(fit_ary=X_train,
                                     transform_arys=(X_train, X_test))
Y_train, Y_test = pp.feature_scaling(fit_ary=Y_train,
                                     transform_arys=(Y_train, Y_test))

# In[] Fitting Simple Regressor
# from sklearn.linear_model import LinearRegression

# regressor = LinearRegression()
# regressor.fit(X_train, Y_train)
# Y_pred = regressor.predict(X_test)
Example #9
0
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 21 16:35:04 2021

@author: henry
"""

import HappyML.preprocessor as pp
from HappyML.classification import NaiveBayesClassifier
from HappyML.performance import KFoldClassificationPerformance
from HappyML.criteria import AssumptionChecker
import HappyML.model_drawer as md

dataset = pp.dataset("Diabetes.csv")
X, Y = pp.decomposition(dataset, x_columns=[i for i in range(8)], y_columns=[-1])

selector = pp.KBestSelector()
X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y)

X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))

classifier = NaiveBayesClassifier()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

K = 10
Kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K)

print("{} Folds Mean Accuracy: {}".format(K, Kfp.accuracy()))
print("{} Folds Mean Recall: {}".format(K, Kfp.recall()))
Example #10
0
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 12 23:38:01 2019

@author: 俊男
"""

# In[] Import & Load data
import HappyML.preprocessor as pp

dataset = pp.dataset(file="CarEvaluation.csv")

# In[] Decomposition
X, Y = pp.decomposition(dataset,
                        x_columns=[i for i in range(4)],
                        y_columns=[4])

# In[] Missing Data
X = pp.missing_data(X, strategy="mean")

# In[] Categorical Data Encoding

# Label Encoding
Y, Y_mapping = pp.label_encoder(Y, mapping=True)

# One-Hot Encoding
X = pp.onehot_encoder(X, columns=[0])

# In[] Split Training Set, Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(X,
                                                       Y,
Example #11
0
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 30 20:15:21 2021

@author: henry
"""

# In[]
import HappyML.preprocessor as pp

# Load Dataset
dataset = pp.dataset("C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch10 K-Means/Mall_Customers.csv")

# Decomposition
X = pp.decomposition(dataset, x_columns=[1, 2, 3, 4])

# One-Hot Encoding
X = pp.onehot_encoder(ary=X, columns=[0], remove_trap=True)

# Feature Scaling (for PCA Feature Selection)
X = pp.feature_scaling(fit_ary=X, transform_arys=X)

# Feature Selection (PCA)
from HappyML.preprocessor import PCASelector

selector = PCASelector()
X = selector.fit(x_ary=X, verbose=True, plot=True).transform(x_ary=X)

# In[]
from HappyML.clustering import KMeansCluster
# -*- coding: utf-8 -*-
"""
Created on Mon Aug  9 19:22:57 2021

@author: henry
"""

# In[]
import HappyML.preprocessor as pp

dataset = pp.dataset(file="C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/Social_Network_Ads.csv")

X, Y = pp.decomposition(dataset, x_columns=[1, 2, 3], y_columns=[4])

X = pp.onehot_encoder(X, columns=[0], remove_trap=True)

from HappyML.preprocessor import KBestSelector
selector = KBestSelector()
X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)


X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)

X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))

# In[]
# from sklearn.linear_model import LogisticRegression
# import time

# classifier = LogisticRegression(solver="lbfgs", random_state=int(time.time()))
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 19 19:58:29 2021

@author: henry
"""

# In[]
import HappyML.preprocessor as pp

dataset = pp.dataset(file="C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch04 Preprocessing/CarEvaluation.csv")
print(dataset)

# In[]
X, Y = pp.decomposition(dataset, x_columns=[i for i in range(4)], y_columns=[4])
print(X)
print(Y)

# In[]
X = pp.missing_data(X, strategy="mean")
print(X)

# In[]
Y = pp.label_encoder(Y)
print(Y)

Y, Y_mapping = pp.label_encoder(Y, mapping=True)
print(Y)
print(Y_mapping)

# In[]
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 26 21:20:09 2019

@author: 俊男
"""

# In[] Preprocessing
import HappyML.preprocessor as pp

# Load Dataset
dataset = pp.dataset(file="Mall_Customers.csv")

# Decomposition
X = pp.decomposition(dataset, x_columns=[1, 2, 3, 4])

# One-Hot Encoding
X = pp.onehot_encoder(ary=X, columns=[0], remove_trap=True)

# Feature Scaling (for PCA Feature Selection)
X = pp.feature_scaling(fit_ary=X, transform_arys=X)

# Feature Selection (PCA)
from HappyML.preprocessor import PCASelector

selector = PCASelector()
X = selector.fit(x_ary=X, verbose=True, plot=True).transform(x_ary=X)

# In[] K-Means Clustering with Fixed Clusters = 4 (Without HappyML)
# from sklearn.cluster import KMeans
# import time
Example #15
0
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 26 20:35:29 2021

@author: henry
"""

# In[]
import HappyML.preprocessor as pp

dataset = pp.dataset(file="C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/Salary_Data.csv")

X, Y = pp.decomposition(dataset, [0], [1])

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=2/3)

X_train, X_test = pp.feature_scaling(X_train, transform_arys=(X_train, X_test))
Y_train, Y_test = pp.feature_scaling(Y_train, transform_arys=(Y_train, Y_test))

# In[]
from HappyML.regression import SimpleRegressor

regressor = SimpleRegressor()
Y_pred= regressor.fit(X_train, Y_train).predict(X_test)

print("R-Squared Score:", regressor.r_score(X_test, Y_test))

# In[]
from HappyML import model_drawer as md

sample_data = (X_train, Y_train)
Example #16
0
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 20 21:53:40 2021

@author: henry
"""

import HappyML.preprocessor as pp
from HappyML.regression import PolynomialRegressor
import pandas as pd
import HappyML.model_drawer as md
from HappyML.performance import rmse

dataset = pp.dataset("Device_Failure.csv")
X, Y = pp.decomposition(dataset, x_columns=[0], y_columns=[1])

X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X,
                                                       y_ary=Y,
                                                       train_size=0.75)

reg_poly = PolynomialRegressor()
reg_poly.best_degree(x_train=X_train,
                     y_train=Y_train,
                     x_test=X_test,
                     y_test=Y_test)
Y_poly = reg_poly.fit(x_train=X, y_train=Y).predict(x_test=X)

years = float(input("請輸入設備已使用年份:"))
hours_pred = reg_poly.predict(pd.DataFrame([[years]])).iloc[0, 0]
print("您的設備預測總失效時間 =", "{:.4f}".format(hours_pred), "小時")
print("平均每年失效時間 =", "{:.4f}".format(hours_pred / years), "小時/年")
Example #17
0
# # By KBestSelector
# from HappyML.preprocessor import KBestSelector
# selector = KBestSelector(best_k=2)
# X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)

# # Split Training / Testing Set
# X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y)

# # Feature Scaling
# X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))

# In[] Preprocessing #3: With PCA, and Boundary Visualization
import HappyML.preprocessor as pp

# Load Data, also can be loaded by sklearn.datasets.load_wine()
dataset = pp.dataset(file="Wine.csv")

# Decomposition
X, Y = pp.decomposition(dataset,
                        x_columns=[i for i in range(13)],
                        y_columns=[13])

# Feature Scaling
X = pp.feature_scaling(fit_ary=X, transform_arys=X)

# # PCA without HappyML's Class
# from sklearn.decomposition import PCA
# import numpy as np
# import matplotlib.pyplot as plt
# import pandas as pd
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 16 21:53:25 2019

@author: 俊男
"""

# In[] Preprocessing
import HappyML.preprocessor as pp

# Load Dataset
dataset = pp.dataset(file="Social_Network_Ads.csv")

# X, Y Decomposition
X, Y = pp.decomposition(dataset, x_columns=[1, 2, 3], y_columns=[4])

# Categorical Data Encoding & Remove Dummy Variable Trap
X = pp.onehot_encoder(X, columns=[0], remove_trap=True)

# Feature Selection
from HappyML.preprocessor import KBestSelector
selector = KBestSelector()
X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)

# Split Training & Testing set
X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)

# Feature Scaling
X_train, X_test = pp.feature_scaling(fit_ary=X_train,
                                     transform_arys=(X_train, X_test))
Example #19
0
# -*- coding: utf-8 -*-
"""
Created on Tue Sep  7 09:57:19 2021

@author: henry
"""

import HappyML.preprocessor as pp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import pandas as pd
from HappyML.performance import ClassificationPerformance

dataset = pp.dataset("Churn_Modelling.csv")

X, Y = pp.decomposition(dataset, [i for i in range(3, 13)], [13])

X = pp.onehot_encoder(X, [1, 2], True)

selector = pp.KBestSelector()
X = selector.fit(X, Y, True, True).transform(X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)

X_train, X_test = pp.feature_scaling(X_train, (X_train, X_test))

classifier = Sequential()

arithmetic_mean = [0, 0]
arithmetic_mean[0] = int((X_train.shape[1] + 1) / 2)
arithmetic_mean[1] = int((arithmetic_mean[0] + 1) / 2)
Example #20
0
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 16 11:52:51 2019

@author: 俊男
"""

# In[] Preprocessing
import HappyML.preprocessor as pp

# Load Dataset
dataset = pp.dataset(file="Position_Salaries.csv")

# Decomposition of Variables
X, Y = pp.decomposition(dataset, x_columns=[1], y_columns=[2])

# Training / Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X,
                                                       y_ary=Y,
                                                       train_size=0.8)

# Feature Scaling
#X = pp.feature_scaling(fit_ary=X, transform_arys=(X))
#Y = pp.feature_scaling(fit_ary=Y, transform_arys=(Y))

# In[] Linear Regression as comparison
from HappyML.regression import SimpleRegressor
import HappyML.model_drawer as md

reg_simple = SimpleRegressor()
Y_simple = reg_simple.fit(x_train=X, y_train=Y).predict(x_test=X)
Example #21
0
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 30 00:22:54 2021

@author: henry
"""

import HappyML.preprocessor as pp
from HappyML.classification import SVM
from HappyML.performance import KFoldClassificationPerformance
import numpy as np
from HappyML.performance import GridSearch

# SVM without GridSearch
dataset = pp.dataset("Voice.csv")
X, Y = pp.decomposition(dataset, [i for i in range(20)], [20])
Y, Y_mapping = pp.label_encoder(Y, mapping=True)

selector = pp.KBestSelector()
X = selector.fit(X, Y, True, True).transform(X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)
X_train, X_test = pp.feature_scaling(X_train, (X_train, X_test))

classifier = SVM()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

K = 10
kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K)

print("----- SVM Classification -----")
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 15 12:21:45 2019

@author: 俊男
"""

# In[] Pre-processing
import HappyML.preprocessor as pp

# Dataset Loading
dataset = pp.dataset("50_Startups.csv")

# Independent/Dependent Variables Decomposition
X, Y = pp.decomposition(dataset, [0, 1, 2, 3], [4])

# Apply One Hot Encoder to Column[3] & Remove Dummy Variable Trap
X = pp.onehot_encoder(X, columns=[3])
X = pp.remove_columns(X, [3])
#X = pp.onehot_encoder(X, columns=[3], remove_trap=True)

# Split Training vs. Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8)

# Feature Scaling (optional)
#X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))
#Y_train, Y_test = pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test))

# In[] Create Linear Regressor
from HappyML.regression import SimpleRegressor
Example #23
0
# -*- coding: utf-8 -*-
"""
Created on Thu Sep  2 08:06:43 2021

@author: henry
"""

import HappyML.preprocessor as pp
from HappyML.classification import RandomForest
from HappyML.performance import KFoldClassificationPerformance
from random import randint
import HappyML.model_drawer as md
from IPython.display import Image, display

dataset = pp.dataset("Zoo_Data.csv")
dataset_classname = pp.dataset("Zoo_Class_Name.csv")
class_names = [
    row["Class_Type"] for index, row in dataset_classname.iterrows()
]

X, Y = pp.decomposition(dataset, [i for i in range(17) if i != 0], [17])

selector = pp.KBestSelector(best_k="auto")
X = selector.fit(X, Y, sort=False).transform(X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)

classifier = RandomForest()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

kfp = KFoldClassificationPerformance(X, Y, classifier.classifier)