########### AdaBoostClassifier on Real Input and Discrete Output ################### print("-----------------------------------------------------------") print("Decision stump on random data") print("-----------------------------------------------------------") N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 3 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category") criteria = 'information_gain' tree = DecisionTree(criterion=criteria) re = X.shape[0] img_weights = [1 / re] * re tree.fit(X, y, img_weights) yhat = pd.Series(tree.predict(X)) print('Criteria :', criteria) print('Accuracy: ', accuracy(yhat, y)) for cls in y.unique(): print("***Class :" + str(cls) + "***") print('Precision: ', precision(yhat, y, cls)) print('Recall: ', recall(yhat, y, cls)) print("-----------------------------------------------------------") print("Adaboost on random data") print("-----------------------------------------------------------") Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators) Classifier_AB.fit(X, y)
shuffled = estate.sample(frac=1).reset_index(drop=True) # Preprocessing X = shuffled.iloc[:, :-1].squeeze() y = (shuffled.iloc[:, -1:]).T.squeeze() len_estate = len(y) # Splitting data X_train, y_train = X.loc[:split*len_estate], y.loc[:split*len_estate] X_test, y_test = X.loc[split*len_estate+1:].reset_index( drop=True), y.loc[split*len_estate+1:].reset_index(drop=True) # Learning tree print("Please wait for some time, it takes time, you can change max depth if it takes too long time.") tree = DecisionTree(criterion="information_gain", max_depth=max_depth) tree.fit(X_train, y_train) tree.plot() # Printing accuracies for different depths for depth in range(2, max_depth+1): y_hat = tree.predict(X_test, max_depth=depth) print("Depth: ", depth) print('\tRMSE: ', rmse(y_hat, y_test)) print('\tMAE: ', mae(y_hat, y_test)) # Decision Tree Regressor from Sci-kit learn dt = DecisionTreeRegressor(random_state=0) dt.fit(X_train, y_train) y_hat = pd.Series(dt.predict(X_test)) print('Sklearn RMSE: ', rmse(y_hat, y_test))
from tree.base import DecisionTree from metrics import * np.random.seed(42) # Test case 1 # Real Input and Real Output N = 30 P = 5 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randn(N)) for criteria in ['information_gain', 'gini_index']: tree = DecisionTree(criterion=criteria) #Split based on Inf. Gain tree.fit(X, y) y_hat = tree.predict(X) tree.plot() print('Criteria :', criteria) print('RMSE: ', rmse(y_hat, y)) print('MAE: ', mae(y_hat, y)) # Test case 2 # Real Input and Discrete Output N = 30 P = 5 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randint(P, size = N), dtype="category") for criteria in ['information_gain', 'gini_index']:
for i in range(len(y)): if (y[i]!='Iris-virginica'): y[i] = 'not virginica' N = len(y) t = int(np.floor(0.6*N)) X_train = X.iloc[:t,:] y_train = y[:t] X_test = X.iloc[t:,:] y_test = list(y[t:]) y_test = pd.Series(y_test) criteria = 'information_gain' tree = DecisionTree(criterion=criteria,max_depth=1) Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators ) Classifier_AB.fit(X_train, y_train) y_hat = Classifier_AB.predict(X_test) # [fig1, fig2] = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y_test)) for cls in y.unique(): print('Precision: ', precision(y_hat, y_test, cls)) print('Recall: ', recall(y_hat, y_test, cls)) print("\nDECISION STUMP") tree.fit(X_train,y_train,np.ones(N)/N) y_hat = tree.predict(X_test) print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y_test)) for cls in y.unique(): print('Precision: ', precision(y_hat, y_test, cls)) print('Recall: ', recall(y_hat, y_test, cls))
# 70:30 train test split train_test_split = int(0.7*data.shape[0]) X = data.iloc[:train_test_split, :-1] X_test = data.iloc[train_test_split:, :-1] y = data.iloc[:train_test_split, -1] y_test = data.iloc[train_test_split:, -1] maxdepth = 4 # Building Decesion Tree based on my model criteria = 'information_gain' mytree = DecisionTree(criterion=criteria, max_depth=maxdepth) #Split based on Inf. Gain mytree.fit(X, y) mytree.plot() print("My Model") y_hat = mytree.predict(X) print("Train Scores:") print('\tRMSE: ', rmse(y_hat, y)) print('\tMAE: ', mae(y_hat, y)) y_test_hat = mytree.predict(X_test) print("Test Scores:") print('\tRMSE: ', rmse(y_test_hat, y_test)) print('\tMAE: ', mae(y_test_hat, y_test)) ###################################################################################
import numpy as np import matplotlib.pyplot as plt from tree.base import DecisionTree from metrics import * from sklearn.tree import DecisionTreeRegressor np.random.seed(42) # Read real-estate data set # ... # estate = pd.read_csv('Real_estate.csv', index_col='No', dtype=float) estate = estate.sample(frac=1).reset_index(drop=True) split_at = int(0.3 * (estate.shape[0])) X_train = estate.iloc[:split_at, :-1] y_train = estate.iloc[:split_at, -1] X_test = estate.iloc[split_at:, :-1] y_test = estate.iloc[split_at:, -1] model = DecisionTree(max_depth=2) model.fit(X_train, y_train) y_out = model.predict(X_test) print("Rmse is: ", rmse(y_out, y_test)) print("Mae is: ", mae(y_out, y_test)) model2 = DecisionTreeRegressor(max_depth=2) model2.fit(X_train, y_train) y_out = model2.predict(X_test) print("Rmse of Sklearn is: ", rmse(y_out, y_test)) print("Mae of Sklearn is: ", mae(y_out, y_test))
np.random.seed(42) # Read IRIS data set # ... # dataset = load_iris() X, y = dataset.data, dataset.target #from sklearn.utils import shuffle #X, y = shuffle(X, y, random_state=0) print("fit model for iris dataset for 70-30 division") clf = DecisionTree(criterion="a", max_depth=5) clf.fit(pd.DataFrame(X[0:120]), pd.Series(y[0:120], dtype="category")) y = y[120:] y_hat = clf.predict(pd.DataFrame(X[120:])) print("Accuracy", accuracy(pd.Series(y_hat), pd.Series(y))) y = pd.Series(y) for cls in y.unique(): print('Precision: for class ', cls, " : ", precision(y_hat, y, cls)) print('Recall: ', cls, " : ", recall(y_hat, y, cls)) def cross_validtion_5_fold(X, y, depth): X_original = X y_original = y clf = DecisionTree(criterion="a", max_depth=depth)