# -*- coding: utf-8 -*- """ Created on Tue Sep 7 09:06:38 2021 @author: henry """ import HappyML.preprocessor as pp from HappyML.clustering import KMeansCluster import HappyML.model_drawer as md from HappyML.classification import DecisionTree from HappyML.performance import KFoldClassificationPerformance from IPython.display import Image, display dataset = pp.dataset("CreditCards.csv") dataset = pp.missing_data(dataset) X = pp.decomposition(dataset, [i for i in range(18) if i != 0]) X = pp.feature_scaling(X, X) selector = pp.PCASelector(best_k=2) X = selector.fit(X).transform(X) cluster = KMeansCluster() Y_pred = cluster.fit(X).predict(X, "Customer Type") md.cluster_drawer(X, Y_pred, cluster.centroids, "Customers Segmentation", "Microsoft JhengHei")
# -*- coding: utf-8 -*- """ Created on Mon Aug 23 20:11:45 2021 @author: henry """ # In[] import HappyML.preprocessor as pp dataset = pp.dataset(file="Mushrooms.csv") X, Y = pp.decomposition(dataset, x_columns=[i for i in range(1, 23)], y_columns=[0]) X = pp.onehot_encoder(X, columns=[i for i in range(22)], remove_trap=True) Y, Y_mapping = pp.label_encoder(Y, mapping=True) from HappyML.preprocessor import KBestSelector selector = KBestSelector(best_k="auto") X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X) X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y) # In[] from HappyML.classification import DecisionTree classifier = DecisionTree() Y_pred = classifier.fit(X_train, Y_train).predict(X_test)
# -*- coding: utf-8 -*- """ Created on Sun Aug 29 05:49:05 2021 @author: henry """ import HappyML.preprocessor as pp from HappyML.classification import DecisionTree from HappyML.performance import KFoldClassificationPerformance import HappyML.model_drawer as md from IPython.display import Image, display dataset = pp.dataset("HR-Employee-Attrition.csv") X, Y = pp.decomposition(dataset, [i for i in range(35) if i != 1], [1]) X = pp.onehot_encoder(X, [1, 3, 6, 10, 14, 16, 20, 21], True) Y, Y_mapping = pp.label_encoder(Y, True) selector = pp.KBestSelector() X = selector.fit(X, Y, True, True).transform(X) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) classifier = DecisionTree() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) K = 10 kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K) print("----- Decision Tree Classification -----")
# -*- coding: utf-8 -*- """ Created on Mon Aug 2 20:53:59 2021 @author: henry """ # In[] import HappyML.preprocessor as pp dataset = pp.dataset( "C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/Position_Salaries.csv" ) X, Y = pp.decomposition(dataset, x_columns=[1], y_columns=[2]) X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y, train_size=0.8) # In[] from HappyML.regression import SimpleRegressor import HappyML.model_drawer as md reg_simple = SimpleRegressor() Y_simple = reg_simple.fit(x_train=X, y_train=Y).predict(X) md.sample_model(sample_data=(X, Y), model_data=(X, Y_simple)) print("R-Squared of Simple Regression:", reg_simple.r_score(x_test=X, y_test=Y))
# -*- coding: utf-8 -*- """ Created on Sun Aug 1 11:40:29 2021 @author: henry """ from HappyML import preprocessor as pp from HappyML.regression import SimpleRegressor import pandas as pd from HappyML import model_drawer as md dataset_h = pp.dataset("Student_Height.csv") dataset_w = pp.dataset("Student_Weight.csv") X_h, Y_h = pp.decomposition(dataset_h, [1], [3, 4]) X_w, Y_w = pp.decomposition(dataset_w, [1], [3, 4]) X_h_train, X_h_test, Y_h_train, Y_h_test = pp.split_train_test(X_h, Y_h) X_w_train, X_w_test, Y_w_train, Y_w_test = pp.split_train_test(X_w, Y_w) regressor = [[SimpleRegressor(), SimpleRegressor()], [SimpleRegressor(), SimpleRegressor()]] regressor[0][0].fit(X_h_train, Y_h_train.iloc[:, 0].to_frame()) regressor[0][1].fit(X_h_train, Y_h_train.iloc[:, 1].to_frame()) regressor[1][0].fit(X_w_train, Y_w_train.iloc[:, 0].to_frame()) regressor[1][1].fit(X_w_train, Y_w_train.iloc[:, 1].to_frame()) print("台灣 6~15 歲學童身高、體重評估系統\n") gender = eval(input("請輸入您的性別(1.男 2.女):")) - 1 age = eval(input("請輸入您的年齡(6-15):"))
# -*- coding: utf-8 -*- """ Created on Mon Aug 2 18:25:36 2021 @author: henry """ # In[] import HappyML.preprocessor as pp dataset = pp.dataset("C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/50_Startups.csv") X, Y = pp.decomposition(dataset, [0, 1, 2, 3], [4]) # X = pp.onehot_encoder(X, columns=[3]) # X = pp.remove_columns(X, [3]) X = pp.onehot_encoder(X, columns=[3], remove_trap=True) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8) X_train, X_test= pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) Y_train, Y_test= pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test)) # In[] from HappyML.regression import SimpleRegressor simple_reg = SimpleRegressor() Y_pred_simple = simple_reg.fit(X_train, Y_train).predict(X_test) # R-Squared always increase in multiple linear regression --> Use Adjusted R-Squared instead print("Goodness of Model (R-Squared Score):", simple_reg.r_score(X_test, Y_test))
# from HappyML.preprocessor import KBestSelector # selector = KBestSelector(best_k=2) # X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X) # # Split Training / Testing Set # X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y) # # Feature Scaling # X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) # In[] import HappyML.preprocessor as pp # Load Data, also can be loaded by sklearn.datasets.load_wine() dataset = pp.dataset( "C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch09 Random Forests/Wine.csv" ) # Decomposition X, Y = pp.decomposition(dataset, x_columns=[i for i in range(13)], y_columns=[13]) # Feature Scaling X = pp.feature_scaling(fit_ary=X, transform_arys=X) # PCA without HappyML's Class # from sklearn.decomposition import PCA # import numpy as np # import matplotlib.pyplot as plt # import pandas as pd
# -*- coding: utf-8 -*- """ Created on Mon Jul 15 10:10:12 2019 @author: 俊男 """ # In[] Pre-processing from HappyML import preprocessor as pp # Dataset Loading dataset = pp.dataset("Salary_Data.csv") # Independent/Dependent Variables Decomposition X, Y = pp.decomposition(dataset, [0], [1]) # Split Training vs. Testing Set X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=2 / 3) # Feature Scaling (optional) X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) Y_train, Y_test = pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test)) # In[] Fitting Simple Regressor # from sklearn.linear_model import LinearRegression # regressor = LinearRegression() # regressor.fit(X_train, Y_train) # Y_pred = regressor.predict(X_test)
# -*- coding: utf-8 -*- """ Created on Sat Aug 21 16:35:04 2021 @author: henry """ import HappyML.preprocessor as pp from HappyML.classification import NaiveBayesClassifier from HappyML.performance import KFoldClassificationPerformance from HappyML.criteria import AssumptionChecker import HappyML.model_drawer as md dataset = pp.dataset("Diabetes.csv") X, Y = pp.decomposition(dataset, x_columns=[i for i in range(8)], y_columns=[-1]) selector = pp.KBestSelector() X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X) X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y) X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) classifier = NaiveBayesClassifier() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) K = 10 Kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K) print("{} Folds Mean Accuracy: {}".format(K, Kfp.accuracy())) print("{} Folds Mean Recall: {}".format(K, Kfp.recall()))
# -*- coding: utf-8 -*- """ Created on Sat Oct 12 23:38:01 2019 @author: 俊男 """ # In[] Import & Load data import HappyML.preprocessor as pp dataset = pp.dataset(file="CarEvaluation.csv") # In[] Decomposition X, Y = pp.decomposition(dataset, x_columns=[i for i in range(4)], y_columns=[4]) # In[] Missing Data X = pp.missing_data(X, strategy="mean") # In[] Categorical Data Encoding # Label Encoding Y, Y_mapping = pp.label_encoder(Y, mapping=True) # One-Hot Encoding X = pp.onehot_encoder(X, columns=[0]) # In[] Split Training Set, Testing Set X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y,
# -*- coding: utf-8 -*- """ Created on Mon Aug 30 20:15:21 2021 @author: henry """ # In[] import HappyML.preprocessor as pp # Load Dataset dataset = pp.dataset("C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch10 K-Means/Mall_Customers.csv") # Decomposition X = pp.decomposition(dataset, x_columns=[1, 2, 3, 4]) # One-Hot Encoding X = pp.onehot_encoder(ary=X, columns=[0], remove_trap=True) # Feature Scaling (for PCA Feature Selection) X = pp.feature_scaling(fit_ary=X, transform_arys=X) # Feature Selection (PCA) from HappyML.preprocessor import PCASelector selector = PCASelector() X = selector.fit(x_ary=X, verbose=True, plot=True).transform(x_ary=X) # In[] from HappyML.clustering import KMeansCluster
# -*- coding: utf-8 -*- """ Created on Mon Aug 9 19:22:57 2021 @author: henry """ # In[] import HappyML.preprocessor as pp dataset = pp.dataset(file="C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/Social_Network_Ads.csv") X, Y = pp.decomposition(dataset, x_columns=[1, 2, 3], y_columns=[4]) X = pp.onehot_encoder(X, columns=[0], remove_trap=True) from HappyML.preprocessor import KBestSelector selector = KBestSelector() X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) # In[] # from sklearn.linear_model import LogisticRegression # import time # classifier = LogisticRegression(solver="lbfgs", random_state=int(time.time()))
# -*- coding: utf-8 -*- """ Created on Mon Jul 19 19:58:29 2021 @author: henry """ # In[] import HappyML.preprocessor as pp dataset = pp.dataset(file="C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch04 Preprocessing/CarEvaluation.csv") print(dataset) # In[] X, Y = pp.decomposition(dataset, x_columns=[i for i in range(4)], y_columns=[4]) print(X) print(Y) # In[] X = pp.missing_data(X, strategy="mean") print(X) # In[] Y = pp.label_encoder(Y) print(Y) Y, Y_mapping = pp.label_encoder(Y, mapping=True) print(Y) print(Y_mapping) # In[]
# -*- coding: utf-8 -*- """ Created on Mon Aug 26 21:20:09 2019 @author: 俊男 """ # In[] Preprocessing import HappyML.preprocessor as pp # Load Dataset dataset = pp.dataset(file="Mall_Customers.csv") # Decomposition X = pp.decomposition(dataset, x_columns=[1, 2, 3, 4]) # One-Hot Encoding X = pp.onehot_encoder(ary=X, columns=[0], remove_trap=True) # Feature Scaling (for PCA Feature Selection) X = pp.feature_scaling(fit_ary=X, transform_arys=X) # Feature Selection (PCA) from HappyML.preprocessor import PCASelector selector = PCASelector() X = selector.fit(x_ary=X, verbose=True, plot=True).transform(x_ary=X) # In[] K-Means Clustering with Fixed Clusters = 4 (Without HappyML) # from sklearn.cluster import KMeans # import time
# -*- coding: utf-8 -*- """ Created on Mon Jul 26 20:35:29 2021 @author: henry """ # In[] import HappyML.preprocessor as pp dataset = pp.dataset(file="C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/Salary_Data.csv") X, Y = pp.decomposition(dataset, [0], [1]) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=2/3) X_train, X_test = pp.feature_scaling(X_train, transform_arys=(X_train, X_test)) Y_train, Y_test = pp.feature_scaling(Y_train, transform_arys=(Y_train, Y_test)) # In[] from HappyML.regression import SimpleRegressor regressor = SimpleRegressor() Y_pred= regressor.fit(X_train, Y_train).predict(X_test) print("R-Squared Score:", regressor.r_score(X_test, Y_test)) # In[] from HappyML import model_drawer as md sample_data = (X_train, Y_train)
# -*- coding: utf-8 -*- """ Created on Fri Aug 20 21:53:40 2021 @author: henry """ import HappyML.preprocessor as pp from HappyML.regression import PolynomialRegressor import pandas as pd import HappyML.model_drawer as md from HappyML.performance import rmse dataset = pp.dataset("Device_Failure.csv") X, Y = pp.decomposition(dataset, x_columns=[0], y_columns=[1]) X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y, train_size=0.75) reg_poly = PolynomialRegressor() reg_poly.best_degree(x_train=X_train, y_train=Y_train, x_test=X_test, y_test=Y_test) Y_poly = reg_poly.fit(x_train=X, y_train=Y).predict(x_test=X) years = float(input("請輸入設備已使用年份:")) hours_pred = reg_poly.predict(pd.DataFrame([[years]])).iloc[0, 0] print("您的設備預測總失效時間 =", "{:.4f}".format(hours_pred), "小時") print("平均每年失效時間 =", "{:.4f}".format(hours_pred / years), "小時/年")
# # By KBestSelector # from HappyML.preprocessor import KBestSelector # selector = KBestSelector(best_k=2) # X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X) # # Split Training / Testing Set # X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y) # # Feature Scaling # X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) # In[] Preprocessing #3: With PCA, and Boundary Visualization import HappyML.preprocessor as pp # Load Data, also can be loaded by sklearn.datasets.load_wine() dataset = pp.dataset(file="Wine.csv") # Decomposition X, Y = pp.decomposition(dataset, x_columns=[i for i in range(13)], y_columns=[13]) # Feature Scaling X = pp.feature_scaling(fit_ary=X, transform_arys=X) # # PCA without HappyML's Class # from sklearn.decomposition import PCA # import numpy as np # import matplotlib.pyplot as plt # import pandas as pd
# -*- coding: utf-8 -*- """ Created on Tue Jul 16 21:53:25 2019 @author: 俊男 """ # In[] Preprocessing import HappyML.preprocessor as pp # Load Dataset dataset = pp.dataset(file="Social_Network_Ads.csv") # X, Y Decomposition X, Y = pp.decomposition(dataset, x_columns=[1, 2, 3], y_columns=[4]) # Categorical Data Encoding & Remove Dummy Variable Trap X = pp.onehot_encoder(X, columns=[0], remove_trap=True) # Feature Selection from HappyML.preprocessor import KBestSelector selector = KBestSelector() X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X) # Split Training & Testing set X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) # Feature Scaling X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))
# -*- coding: utf-8 -*- """ Created on Tue Sep 7 09:57:19 2021 @author: henry """ import HappyML.preprocessor as pp from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense import pandas as pd from HappyML.performance import ClassificationPerformance dataset = pp.dataset("Churn_Modelling.csv") X, Y = pp.decomposition(dataset, [i for i in range(3, 13)], [13]) X = pp.onehot_encoder(X, [1, 2], True) selector = pp.KBestSelector() X = selector.fit(X, Y, True, True).transform(X) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) X_train, X_test = pp.feature_scaling(X_train, (X_train, X_test)) classifier = Sequential() arithmetic_mean = [0, 0] arithmetic_mean[0] = int((X_train.shape[1] + 1) / 2) arithmetic_mean[1] = int((arithmetic_mean[0] + 1) / 2)
# -*- coding: utf-8 -*- """ Created on Tue Jul 16 11:52:51 2019 @author: 俊男 """ # In[] Preprocessing import HappyML.preprocessor as pp # Load Dataset dataset = pp.dataset(file="Position_Salaries.csv") # Decomposition of Variables X, Y = pp.decomposition(dataset, x_columns=[1], y_columns=[2]) # Training / Testing Set X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y, train_size=0.8) # Feature Scaling #X = pp.feature_scaling(fit_ary=X, transform_arys=(X)) #Y = pp.feature_scaling(fit_ary=Y, transform_arys=(Y)) # In[] Linear Regression as comparison from HappyML.regression import SimpleRegressor import HappyML.model_drawer as md reg_simple = SimpleRegressor() Y_simple = reg_simple.fit(x_train=X, y_train=Y).predict(x_test=X)
# -*- coding: utf-8 -*- """ Created on Mon Aug 30 00:22:54 2021 @author: henry """ import HappyML.preprocessor as pp from HappyML.classification import SVM from HappyML.performance import KFoldClassificationPerformance import numpy as np from HappyML.performance import GridSearch # SVM without GridSearch dataset = pp.dataset("Voice.csv") X, Y = pp.decomposition(dataset, [i for i in range(20)], [20]) Y, Y_mapping = pp.label_encoder(Y, mapping=True) selector = pp.KBestSelector() X = selector.fit(X, Y, True, True).transform(X) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) X_train, X_test = pp.feature_scaling(X_train, (X_train, X_test)) classifier = SVM() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) K = 10 kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K) print("----- SVM Classification -----")
# -*- coding: utf-8 -*- """ Created on Mon Jul 15 12:21:45 2019 @author: 俊男 """ # In[] Pre-processing import HappyML.preprocessor as pp # Dataset Loading dataset = pp.dataset("50_Startups.csv") # Independent/Dependent Variables Decomposition X, Y = pp.decomposition(dataset, [0, 1, 2, 3], [4]) # Apply One Hot Encoder to Column[3] & Remove Dummy Variable Trap X = pp.onehot_encoder(X, columns=[3]) X = pp.remove_columns(X, [3]) #X = pp.onehot_encoder(X, columns=[3], remove_trap=True) # Split Training vs. Testing Set X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8) # Feature Scaling (optional) #X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) #Y_train, Y_test = pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test)) # In[] Create Linear Regressor from HappyML.regression import SimpleRegressor
# -*- coding: utf-8 -*- """ Created on Thu Sep 2 08:06:43 2021 @author: henry """ import HappyML.preprocessor as pp from HappyML.classification import RandomForest from HappyML.performance import KFoldClassificationPerformance from random import randint import HappyML.model_drawer as md from IPython.display import Image, display dataset = pp.dataset("Zoo_Data.csv") dataset_classname = pp.dataset("Zoo_Class_Name.csv") class_names = [ row["Class_Type"] for index, row in dataset_classname.iterrows() ] X, Y = pp.decomposition(dataset, [i for i in range(17) if i != 0], [17]) selector = pp.KBestSelector(best_k="auto") X = selector.fit(X, Y, sort=False).transform(X) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) classifier = RandomForest() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) kfp = KFoldClassificationPerformance(X, Y, classifier.classifier)