train_test_split = int(0.7*len(iris_data))

X = X_data.iloc[:train_test_split, :]
X_test = X_data.iloc[train_test_split:, :]
y = y_data.iloc[:train_test_split]
y_test = y_data.iloc[train_test_split:]

# Training and Testing
for criteria in ['information_gain', 'gini_index']:
    tree = DecisionTree(criterion=criteria, max_depth=3)
    # Build Decision Tree
    tree.fit(X, y)
    #Predict
    y_hat = tree.predict(X)
    y_test_hat = tree.predict(X_test)
    tree.plot()
    print('Criteria :', criteria)
    print('Train Accuracy: ', accuracy(y_hat, y))
    print('Test Accuracy: ', accuracy(y_test_hat, y_test))
    # Precesion and Recall for each class
    for cls in y.unique():
        print("Class =",cls)
        print('Precision: ', precision(y_test_hat, y_test, cls))
        print('Recall: ', recall(y_test_hat, y_test, cls))


####################################################################################

# 5 fold cross-validation
acc = 0
for i in range(5):
Beispiel #2
0
# 70:30 train test split
train_test_split = int(0.7*data.shape[0])

X = data.iloc[:train_test_split, :-1]
X_test = data.iloc[train_test_split:, :-1]
y = data.iloc[:train_test_split, -1]
y_test = data.iloc[train_test_split:, -1]


maxdepth = 4

# Building Decesion Tree based on my model
criteria = 'information_gain'
mytree = DecisionTree(criterion=criteria, max_depth=maxdepth) #Split based on Inf. Gain
mytree.fit(X, y)
mytree.plot()

print("My Model")
y_hat = mytree.predict(X)
print("Train Scores:")
print('\tRMSE: ', rmse(y_hat, y))
print('\tMAE: ', mae(y_hat, y))

y_test_hat = mytree.predict(X_test)
print("Test Scores:")
print('\tRMSE: ', rmse(y_test_hat, y_test))
print('\tMAE: ', mae(y_test_hat, y_test))

###################################################################################

# Building Decesion Tree based on sklearn