import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tree.base import DecisionTree
from metrics import *
from pprint import pprint

np.random.seed(42)

# Read IRIS data set
# ...
# 

tree = DecisionTree(criterion='information_gain',max_depth=10) #Split based on Inf. Gain
tree.output="category"
tree.input="real"
df=pd.read_csv("iris.data",names=['sepal_length','sepal_width','petal_length','petal_width','label'])
train_data,test_data=tree.train_test_split(df)
sub_tree = tree.decision_tree_algorithm(train_data)
tree.tree=sub_tree
rows,colums=test_data.values.shape
y_hat = tree.predict(test_data.iloc[:,0:colums-1])
y= test_data.iloc[:,-1]
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Class Name: ',cls)
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))
    print()

Esempio n. 2
0
from metrics import *

from sklearn import tree as sktree
from sklearn import metrics
from sklearn.model_selection import train_test_split
import numpy as np
from pprint import pprint

np.random.seed(42)

# Read real-estate data set
# ...
#
tree = DecisionTree(criterion='information_gain',
                    max_depth=10)  #Split based on Inf. Gain
tree.output = "discrete"
tree.input = "discrete"
df = pd.read_excel("Real estate valuation data set.xlsx",
                   names=[
                       'No', 'tran_date', 'age', 'distance_mrt', 'stores',
                       'lat', 'long', 'price'
                   ])
df = df.drop('No', axis=1)
train_data, test_data = tree.train_test_split(df)
sub_tree = tree.regression_tree_algorithm(df)
print(sub_tree)
tree.tree = sub_tree
rows, colums = test_data.values.shape
y_hat = tree.predict(test_data.iloc[:, 0:colums - 1])
y = test_data.iloc[:, -1]
print('RMSE: ', rmse(y_hat, y))