# In[] Pre-processing
import HappyML.preprocessor as pp

# Dataset Loading
dataset = pp.dataset("50_Startups.csv")

# Independent/Dependent Variables Decomposition
X, Y = pp.decomposition(dataset, [0, 1, 2, 3], [4])

# Apply One Hot Encoder to Column[3] & Remove Dummy Variable Trap
X = pp.onehot_encoder(X, columns=[3])
X = pp.remove_columns(X, [3])
#X = pp.onehot_encoder(X, columns=[3], remove_trap=True)

# Split Training vs. Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8)

# Feature Scaling (optional)
#X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))
#Y_train, Y_test = pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test))

# In[] Create Linear Regressor
from HappyML.regression import SimpleRegressor

simple_reg = SimpleRegressor()
Y_pred_simple = simple_reg.fit(X_train, Y_train).predict(X_test)

# R-Squared always increase in multiple linear regression --> Use Adjusted R-Squared instead
print("Goodness of Model (R-Squared Score):",
      simple_reg.r_score(X_test, Y_test))
Ejemplo n.º 2
0
@author: 俊男
"""

# In[] Preprocessing
import HappyML.preprocessor as pp

# Load Dataset
dataset = pp.dataset(file="Position_Salaries.csv")

# Decomposition of Variables
X, Y = pp.decomposition(dataset, x_columns=[1], y_columns=[2])

# Training / Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X,
                                                       y_ary=Y,
                                                       train_size=0.8)

# Feature Scaling
#X = pp.feature_scaling(fit_ary=X, transform_arys=(X))
#Y = pp.feature_scaling(fit_ary=Y, transform_arys=(Y))

# In[] Linear Regression as comparison
from HappyML.regression import SimpleRegressor
import HappyML.model_drawer as md

reg_simple = SimpleRegressor()
Y_simple = reg_simple.fit(x_train=X, y_train=Y).predict(x_test=X)

md.sample_model(sample_data=(X, Y), model_data=(X, Y_simple))
print("R-Squared of Simple Regression:", reg_simple.r_score(x_test=X,
Ejemplo n.º 3
0
# X, Y
import pandas as pd
X = pd.DataFrame(dataset.data, columns=dataset.feature_names)
Y = pd.DataFrame(dataset.target, columns=["Iris_Type"])
Y_name = dataset.target_names.tolist()

# Load HappyML
from HappyML.preprocessor import KBestSelector
import HappyML.preprocessor as pp

# Feature Selection
selector = KBestSelector(best_k=2)
X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)

# Split Training / TEsting Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y)

# Feature Scaling
X_train, X_test = pp.feature_scaling(fit_ary=X_train,
                                     transform_arys=(X_train, X_test))

# In[] Comparison: Naive Bayes
from HappyML.classification import NaiveBayesClassifier

clr_bayes = NaiveBayesClassifier()
Y_pred_bayes = clr_bayes.fit(X_train, Y_train).predict(X_test)

# Performance
from HappyML.performance import KFoldClassificationPerformance

K = 10
Ejemplo n.º 4
0
@author: henry
"""

from HappyML import preprocessor as pp
from HappyML.regression import SimpleRegressor
import pandas as pd
from HappyML import model_drawer as md

dataset_h = pp.dataset("Student_Height.csv")
dataset_w = pp.dataset("Student_Weight.csv")

X_h, Y_h = pp.decomposition(dataset_h, [1], [3, 4])
X_w, Y_w = pp.decomposition(dataset_w, [1], [3, 4])

X_h_train, X_h_test, Y_h_train, Y_h_test = pp.split_train_test(X_h, Y_h)
X_w_train, X_w_test, Y_w_train, Y_w_test = pp.split_train_test(X_w, Y_w)

regressor = [[SimpleRegressor(), SimpleRegressor()],
             [SimpleRegressor(), SimpleRegressor()]]
regressor[0][0].fit(X_h_train, Y_h_train.iloc[:, 0].to_frame())
regressor[0][1].fit(X_h_train, Y_h_train.iloc[:, 1].to_frame())
regressor[1][0].fit(X_w_train, Y_w_train.iloc[:, 0].to_frame())
regressor[1][1].fit(X_w_train, Y_w_train.iloc[:, 1].to_frame())

print("台灣 6~15 歲學童身高、體重評估系統\n")
gender = eval(input("請輸入您的性別(1.男 2.女):")) - 1
age = eval(input("請輸入您的年齡(6-15):"))
height = eval(input("請輸入您的身高(cm):"))
weight = eval(input("請輸入您的體重(kg):"))
# Load Dataset
dataset = pp.dataset(file="Social_Network_Ads.csv")

# X, Y Decomposition
X, Y = pp.decomposition(dataset, x_columns=[1, 2, 3], y_columns=[4])

# Categorical Data Encoding & Remove Dummy Variable Trap
X = pp.onehot_encoder(X, columns=[0], remove_trap=True)

# Feature Selection
from HappyML.preprocessor import KBestSelector
selector = KBestSelector()
X = selector.fit(x_ary=X, y_ary=Y, verbose=True, sort=True).transform(x_ary=X)

# Split Training & Testing set
X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)

# Feature Scaling
X_train, X_test = pp.feature_scaling(fit_ary=X_train,
                                     transform_arys=(X_train, X_test))

# In[] Logistic Regression
#from sklearn.linear_model import LogisticRegression
#import time
#
## Model Creation
#classifier = LogisticRegression(solver="lbfgs", random_state=int(time.time()))
#
## Features Selection
#from sklearn.feature_selection import SelectKBest
#from sklearn.feature_selection import chi2
Ejemplo n.º 6
0
# In[] Import & Load data
import HappyML.preprocessor as pp

dataset = pp.dataset(file="CarEvaluation.csv")

# In[] Decomposition
X, Y = pp.decomposition(dataset,
                        x_columns=[i for i in range(4)],
                        y_columns=[4])

# In[] Missing Data
X = pp.missing_data(X, strategy="mean")

# In[] Categorical Data Encoding

# Label Encoding
Y, Y_mapping = pp.label_encoder(Y, mapping=True)

# One-Hot Encoding
X = pp.onehot_encoder(X, columns=[0])

# In[] Split Training Set, Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(X,
                                                       Y,
                                                       train_size=0.8,
                                                       random_state=0)

# In[] Feature Scaling for X_train, X_test
X_train, X_test = pp.feature_scaling(X_train, transform_arys=(X_train, X_test))