Python DecisionTree.predict 예제들, tree.base.DecisionTree.predict Python 예제들

예제 #1

0

파일 보기

파일: ADABoost.py 프로젝트: rohitshantarampatil/Machine-Learning-Course-Assignments

 def fit(self, X, y):
     self.data = X
     self.labels = y
     self.classes = list(set(y))
     n = len(X)
     weights = [1 / n for i in range(n)]
     for estimator in range(self.n_estimators):
         self.clfs.append(X)
         self.clfsy.append(y)
         Dtree = DecisionTree("information_gain", max_depth=1)
         Dtree.fit(X, y, sample_weights=weights)
         self.estimators_list.append(Dtree)
         err = 0
         for i in range(n):
             if Dtree.predict(X.iloc[[i]]) != y[i]:
                 err += weights[i]
         alpha = 0.5 * math.log2((1 - err) / err)
         self.alphas.append(alpha)
         for i in range(n):
             if Dtree.predict(X.iloc[[i]]) != y[i]:
                 weights[i] = weights[i] * math.exp(alpha)
             else:
                 weights[i] = weights[i] * math.exp(-alpha)
         #Normalise the weights
         temp = [t / sum(weights) for t in weights]
         weights = temp

예제 #2

0

파일 보기

def find_Time(case):
    axis_Nf=[0]*25 # These are time for different N values by fixing P on fit
    axis_Np=[0]*25 # for predict
    axis_Pf=[0]*11 # Different P for fixed P and for model fit
    axis_Pp=[0]*11 # for predict
    print("Started 1")
    for i in range(100,500,20):
        X,y=CreateFakeData(i,5,case) #we fix p = 5
        mod=DecisionTree()
        st1=time()
        mod.fit(X,y)
        ed1=time()
        st2=time()
        y_=mod.predict(X)
        ed2=time()
        axis_Nf[(i-100)//20]=(ed1-st1)
        axis_Np[(i-150)//20]=(ed2-st2)
    print("Started 2")    
    for i in range(2,24,2):
        X,y=CreateFakeData(100,i,case)
        mod=DecisionTree()
        st1=time()
        mod.fit(X,y)
        ed1=time()
        st2=time()
        y_=mod.predict(X)
        ed2=time()
        axis_Pf[(i-2)//2]=(ed1-st1)
        axis_Pp[(i-2)//2]=(ed2-st2)
    return axis_Nf,axis_Np,axis_Pf,axis_Pp

예제 #3

0

파일 보기

파일: iris-experiments.py 프로젝트: rohitshantarampatil/Machine-Learning-Course-Assignments

def nested_cross_validation(dataset, y):
    for i in range(5):
        test = dataset[30 * i:30 * (i + 1)]
        test_label = y[30 * i:30 * (i + 1)]
        if 30 * (i + 1) + 120 <= 150:
            train = dataset[30 * (i + 1):]
            train_label = y[30 * (i + 1):]
            #print("yo")

        else:
            train1 = dataset[0:30 * (i + 1) - 30]
            train1_label = y[0:30 * (i + 1) - 30]
            train2 = dataset[30 * (i + 1):]
            train2_label = y[30 * (i + 1):]
            train = np.append(train1, train2, axis=0)
            train_label = np.append(train1_label, train2_label, axis=0)
            #print("yoo")
        accuracy_validation = {}
        for depth in range(1, 11):
            avg_acc = 0
            for j in range(4):
                #print("yooooo")
                #print(train.shape,train_label.shape)
                validation = train[30 * j:30 * (j + 1)]
                validation_label = train_label[30 * j:30 * (j + 1)]
                train_1 = train[30 * (j + 1):]
                train1_label = train_label[30 * (j + 1):]
                train_2 = train[0:30 * (j + 1) - 30]
                train2_label = train_label[0:30 * (j + 1) - 30]
                train_new = np.append(train_1, train_2, axis=0)
                train_new_label = np.append(train1_label, train2_label, axis=0)
                tree = DecisionTree(criterion="gini_index", max_depth=depth)
                #print(pd.DataFrame[train])
                #print(train_new.shape,train_new_label.shape)
                #print(train_new.shape,train_new_label.shape)
                train_new = pd.DataFrame(train_new)
                train_new_label = pd.Series(train_new_label, dtype="category")
                train_new.reset_index(drop=True, inplace=True)
                train_new_label.reset_index(drop=True, inplace=True)
                #print(train_new)
                #print(train_new_label)
                tree.fit(train_new, train_new_label)
                #print("training done")
                #print("now testing")
                avg_acc += accuracy(tree.predict(validation), validation_label)
                #print("acc",acc)
                #print(tree.predict(pd.DataFrame(train)))
            accuracy_validation[depth] = avg_acc / 4
        value = max(accuracy_validation, key=accuracy_validation.get)
        tree = DecisionTree(criterion="gini_index", max_depth=value)
        train = pd.DataFrame(train)
        train_label = pd.Series(train_label, dtype="category")

        tree.fit(train, train_label)
        #tree = tree_iris(train,value,0)
        print("Accuracy is,", accuracy(tree.predict(test),
                                       test_label), " for iteration", i + 1,
              ". The depth of the optimal tree is ", value)

예제 #4

0

파일 보기

파일: iris-experiments.py 프로젝트: rohitshantarampatil/Machine-Learning-Course-Assignments

def cross_validtion_5_fold(X, y, depth):
    X_original = X
    y_original = y

    clf = DecisionTree(criterion="a", max_depth=depth)
    clf.fit(pd.DataFrame(X[0:120]), pd.Series(y[0:120], dtype="category"))
    y = y[120:]
    y_hat = clf.predict(pd.DataFrame(X[120:]))
    print(accuracy(pd.Series(y_hat), pd.Series(y)))

    X = X_original
    y = y_original

    clf = DecisionTree(criterion="a", max_depth=depth)
    clf.fit(pd.DataFrame(np.append(X[90:], X[0:60], axis=0)),
            pd.Series(np.append(y[90:], y[0:60], axis=0), dtype="category"))
    y = y[60:90]
    y_hat = clf.predict(X[60:90])
    print(accuracy(pd.Series(y_hat), pd.Series(y)))

    X = X_original
    y = y_original

    clf = DecisionTree(criterion="a", max_depth=depth)
    clf.fit(pd.DataFrame(np.append(X[120:], X[0:90], axis=0)),
            pd.Series(np.append(y[120:], y[0:90], axis=0), dtype="category"))
    y = y[90:120]
    y_hat = clf.predict(X[90:120])
    print(accuracy(pd.Series(y_hat), pd.Series(y)))

    X = X_original
    y = y_original

    clf = DecisionTree(criterion="a", max_depth=depth)
    clf.fit(pd.DataFrame(X[30:]), pd.Series(y[30:], dtype="category"))
    y = y[0:30]
    y_hat = clf.predict(X[0:30])
    print(accuracy(pd.Series(y_hat), pd.Series(y)))

    X = X_original
    y = y_original

    clf = DecisionTree(criterion="a", max_depth=depth)
    clf.fit(pd.DataFrame(np.append(X[0:30], X[60:], axis=0)),
            pd.Series(np.append(y[0:30], y[60:], axis=0), dtype="category"))
    y = y[30:60]
    y_hat = clf.predict(X[30:60])
    print(accuracy(pd.Series(y_hat), pd.Series(y)))

예제 #5

0

파일 보기

def analyseTime(case):
    assert (1 <= case <= 4)
    fitTimes = {'N': list(), 'P': list(), 'time': list()}
    predictTimes = {'N': list(), 'P': list(), 'time': list()}
    for N in range(40, 50):
        for P in range(2, 10):
            print("Running with N", N, "and P", P)
            X, y = createFakeData(N, P, case)
            tree = DecisionTree(criterion="information_gain", max_depth=3)

            startTime = time.time()
            tree.fit(X, y)
            endTime = time.time()
            fitTimes['N'].append(N)
            fitTimes['P'].append(P)
            fitTimes['time'].append(endTime - startTime)

            startTime = time.time()
            y_hat = tree.predict(X)
            endTime = time.time()
            predictTimes['N'].append(N)
            predictTimes['P'].append(P)
            predictTimes['time'].append(endTime - startTime)

    plotTimings(fitTimes)
    plotTimings(predictTimes)

예제 #6

0

파일 보기

파일: iris-experiments.py 프로젝트: kshru9/Decision-Trees-fromScratch

def five_fold_validation(X, y, depth=5):
    """Function to do five fold cross validation on iris"""
    X_original = X
    y_original = y

    accs = []

    # last 5th chunk as test data
    clf = DecisionTree(criterion="information_gain", max_depth=depth)
    clf.fit(pd.DataFrame(X[0:120]), pd.Series(y[0:120], dtype="category"))
    y_hat = clf.predict(pd.DataFrame(X[120:]))
    accs.append(accuracy(pd.Series(y_hat), pd.Series(y[120:])))

    # 4rd chunk as test data
    clf = DecisionTree(criterion="information_gain", max_depth=depth)
    pass_X = pd.DataFrame(np.append(X[90:], X[0:60], axis=0))
    pass_y = pd.Series(np.append(y[90:], y[0:60], axis=0), dtype="category")
    clf.fit(pass_X, pass_y)
    y_hat = clf.predict(pd.DataFrame(X[60:90]))
    accs.append(accuracy(pd.Series(y_hat), pd.Series(y[60:90])))

    # 3nd chunk as test data
    clf = DecisionTree(criterion="information_gain", max_depth=depth)
    clf.fit(pd.DataFrame(np.append(X[120:], X[0:90], axis=0)),
            pd.Series(np.append(y[120:], y[0:90], axis=0), dtype="category"))
    y_hat = clf.predict(pd.DataFrame(X[90:120]))
    accs.append(accuracy(pd.Series(y_hat), pd.Series(y[90:120])))

    # 2st chunk as test data
    clf = DecisionTree(criterion="information_gain", max_depth=depth)
    clf.fit(pd.DataFrame(X[30:]), pd.Series(y[30:], dtype="category"))
    y_hat = clf.predict(pd.DataFrame(X[0:30]))
    accs.append(accuracy(pd.Series(y_hat), pd.Series(y[0:30])))

    # 1st chunk as test data
    clf = DecisionTree(criterion="information_gain", max_depth=depth)
    clf.fit(pd.DataFrame(np.append(X[0:30], X[60:], axis=0)),
            pd.Series(np.append(y[0:30], y[60:], axis=0), dtype="category"))
    y_hat = clf.predict(pd.DataFrame(X[30:60]))
    accs.append(accuracy(pd.Series(y_hat), pd.Series(y[30:60])))

    print("Individual Accuracies:")
    print(*accs)
    print("Average Accuracy:")
    avg = sum(accs) / 5
    print(avg)

예제 #7

0

파일 보기

def my_regr(X, y, max_depth=5, criterion="information_gain"):
    """Function to train and predict on estate dataset using my decision tree"""
    clf = DecisionTree(criterion=criterion, max_depth=max_depth)

    clf.fit(pd.DataFrame(X[0:330]), pd.Series(y[0:330]))

    # clf.plot()

    y = y[330:]

    y_hat = clf.predict(pd.DataFrame(X[330:]))

    y = pd.Series(y)

    print(rmse(y_hat, y))
    print(mae(y_hat, y))

예제 #8

0

파일 보기

파일: iris-experiments.py 프로젝트: kshru9/Decision-Trees-fromScratch

def train_and_predict(X, y, max_depth=15):
    """Function to train and predict iris using my decision tree"""
    clf = DecisionTree(criterion="information_gain", max_depth=max_depth)

    clf.fit(pd.DataFrame(X[0:120]), pd.Series(y[0:120], dtype="category"))

    y = y[120:]

    y_hat = clf.predict(pd.DataFrame(X[120:]))

    print("Accuracy", accuracy(pd.Series(y_hat), pd.Series(y)))

    y = pd.Series(y)

    for cls in y.unique():
        print('Precision: ', cls, " : ", precision(y_hat, y, cls))
        print('Recall: ', cls, " : ", recall(y_hat, y, cls))

예제 #9

0

파일 보기

파일: iris-experiments.py 프로젝트: ES654/assignment-1-yadavsunny05

def nested_cross(data, y, k1=5, k2=4):
    val1 = len(data) // k1
    for i in range(k1):
        y_test = y[val1 * i:val1 * (i + 1)]
        x_test = data[val1 * i:val1 * (i + 1)]
        x_train = np.append(data[0:val1 * i], data[val1 * (i + 1):], axis=0)
        y_train = np.append(y[0:val1 * i], y[val1 * (i + 1):], axis=0)
        acc = []
        for depth in range(2, 10):
            s = 0
            for j in range(4):
                val2 = len(x_train) // k2
                x_val_test = x_train[val2 * j:val2 * (j + 1)]
                y_val_test = y_train[val2 * j:val2 * (j + 1)]
                x_val_train = np.append(x_train[0:val2 * j],
                                        x_train[val2 * (j + 1):],
                                        axis=0)
                y_val_train = np.append(y_train[0:val2 * j],
                                        y_train[val2 * (j + 1):],
                                        axis=0)
                tree = DecisionTree("information_gain", max_depth=depth)
                x_val_train = pd.DataFrame(x_val_train)
                y_val_train = pd.DataFrame(y_val_train)
                x_val_test = pd.DataFrame(x_val_test)
                y_val_test = pd.DataFrame(y_val_test)
                x_val_train.dtype = "sda"
                y_val_train.dtype = "category"
                x_val_test.dtype = "sda"
                y_val_test.dtype = "category"
                tree.fit(x_val_train, y_val_train)
                s += (accuracy(np.array(y_val_test),
                               np.array(tree.predict(x_val_test))))
            acc.append(s / 4)
        value = max(acc)
        index = acc.index(max(acc))
        tree = DecisionTree("information_gain", max_depth=value)
        print("Best Accuracy is : - " + str(value))
        print("At Depth : - " + str(index + 1))

예제 #10

0

파일 보기

파일: iris-experiments.py 프로젝트: ES654/assignment-1-CM747

# Defining Train Test Split
train_test_split = int(0.7*len(iris_data))

X = X_data.iloc[:train_test_split, :]
X_test = X_data.iloc[train_test_split:, :]
y = y_data.iloc[:train_test_split]
y_test = y_data.iloc[train_test_split:]

# Training and Testing
for criteria in ['information_gain', 'gini_index']:
    tree = DecisionTree(criterion=criteria, max_depth=3)
    # Build Decision Tree
    tree.fit(X, y)
    #Predict
    y_hat = tree.predict(X)
    y_test_hat = tree.predict(X_test)
    tree.plot()
    print('Criteria :', criteria)
    print('Train Accuracy: ', accuracy(y_hat, y))
    print('Test Accuracy: ', accuracy(y_test_hat, y_test))
    # Precesion and Recall for each class
    for cls in y.unique():
        print("Class =",cls)
        print('Precision: ', precision(y_test_hat, y_test, cls))
        print('Recall: ', recall(y_test_hat, y_test, cls))


####################################################################################

# 5 fold cross-validation

예제 #11

0

파일 보기

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tree.base import DecisionTree
from metrics import *

np.random.seed(42)

N = 30
P = 5
X = pd.DataFrame({
    i: pd.Series(np.random.randint(P, size=N), dtype="category")
    for i in range(5)
})
y = pd.Series(np.random.randint(P, size=N), dtype="category")

print('\n\n##Discrete Input and Discrete Output##')
for criteria in ['information_gain']:
    tree = DecisionTree(criterion=criteria,
                        max_depth=np.inf)  #Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    tree.plot()
    print('Criteria :', criteria)
    print('Accuracy: ', accuracy(y_hat, y))
    for cls in y.unique():
        print(cls)
        print('Precision: ', precision(y_hat, y, cls))
        print('Recall: ', recall(y_hat, y, cls))

예제 #12

0

파일 보기

파일: iris-experiments.py 프로젝트: jatinkumar762/MachineLearning

np.random.seed(42)

# Read IRIS data set
# ...
# 

tree = DecisionTree(criterion='information_gain',max_depth=10) #Split based on Inf. Gain
tree.output="category"
tree.input="real"
df=pd.read_csv("iris.data",names=['sepal_length','sepal_width','petal_length','petal_width','label'])
train_data,test_data=tree.train_test_split(df)
sub_tree = tree.decision_tree_algorithm(train_data)
tree.tree=sub_tree
rows,colums=test_data.values.shape
y_hat = tree.predict(test_data.iloc[:,0:colums-1])
y= test_data.iloc[:,-1]
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Class Name: ',cls)
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))
    print()


index=df.index.tolist()
len,_=df.values.shape
#print(len)
test_size=int(len*0.2)
for i in range(5):

예제 #13

0

파일 보기

파일: iris-experiments.py 프로젝트: rohitshantarampatil/Machine-Learning-Course-Assignments

# Read IRIS data set
# ...

#
dataset = load_iris()
X, y = dataset.data, dataset.target

#from sklearn.utils import shuffle
#X, y = shuffle(X, y, random_state=0)

print("fit model for iris dataset for 70-30 division")

clf = DecisionTree(criterion="a", max_depth=5)
clf.fit(pd.DataFrame(X[0:120]), pd.Series(y[0:120], dtype="category"))
y = y[120:]
y_hat = clf.predict(pd.DataFrame(X[120:]))
print("Accuracy", accuracy(pd.Series(y_hat), pd.Series(y)))
y = pd.Series(y)

for cls in y.unique():
    print('Precision: for class ', cls, " : ", precision(y_hat, y, cls))
    print('Recall: ', cls, " : ", recall(y_hat, y, cls))


def cross_validtion_5_fold(X, y, depth):
    X_original = X
    y_original = y

    clf = DecisionTree(criterion="a", max_depth=depth)
    clf.fit(pd.DataFrame(X[0:120]), pd.Series(y[0:120], dtype="category"))
    y = y[120:]

예제 #14

0

파일 보기

파일: estate-experiments.py 프로젝트: ES654/assignment-1-yadavsunny05

# Read real-estate data set
# ...
#
data = pd.read_csv(r'C:\Users\Anshuman Yadav\Documents\Real.csv')
X_train, X_test, Y_train, Y_test = train_test_split(data[data.columns[1:-1]],
                                                    data[data.columns[-1]])
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
Y_train = Y_train.reset_index(drop=True)
Y_test = Y_test.reset_index(drop=True)
X_train.dtype = "d"
X_test.dtype = "d"

tree = DecisionTree("ad", max_depth=25)
tree.fit(X_train, Y_train)
tree.root
y_pred = tree.predict(X_test)
print("MAE my tree : -")
print(mae(np.array(Y_test), np.array(y_pred)))
print("MSE my tree : -")
print(rmse(np.array(Y_test), np.array(y_pred)))

d_tree_sklearn = tree5.DecisionTreeRegressor()
d_tree_sklearn = d_tree_sklearn.fit(X_train, Y_train)
y_sklearn = d_tree_sklearn.predict(X_test)
print("MAE sklearn : -")
print(mae(np.array(Y_test), np.array(y_sklearn)))
print("MSE sklearn : -")
print(rmse(np.array(Y_test), np.array(y_sklearn)))

예제 #15

0

파일 보기

파일: iris-experiments.py 프로젝트: ES654/assignment-1-Decision-tree-and-Ensemble-Learning

np.random.seed(42)

# Read IRIS data set
# ...
#
iris = pd.read_csv('iris.csv')
iris = iris.sample(frac=1).reset_index(drop=True)
split_at = int(0.7 * (iris.shape[0]))
X_train = iris.iloc[:split_at, :-1]
y_train = iris.iloc[:split_at, -1]
X_test = iris.iloc[split_at:, :-1]
y_test = iris.iloc[split_at:, -1]
model = DecisionTree()
model.fit(X_train, y_train)
y_out = model.predict(X_test)
print("Accuracy is: ", accuracy(y_out, y_test))
for group in np.unique(y_test):
    print("Precision of {} is: {}".format(group,
                                          precision(y_out, y_test, group)))
    print("Recal of {} is: {}".format(group, recall(y_out, y_test, group)))

#Accuracy of all five models
fold = int(0.2 * (iris.shape[0]))
for i in range(5):
    n_split1 = i * fold
    n_split2 = n_split1 + fold
    X_test1 = iris.iloc[n_split1:n_split2, :-1].reset_index(drop=True)
    y_test1 = pd.Series(list(iris.iloc[n_split1:n_split2, -1]))
    X_train1 = iris.iloc[:n_split1, :-1].append(
        iris.iloc[n_split2:, :-1]).reset_index(drop=True)

예제 #16

0

파일 보기

파일: estate-experiments.py 프로젝트: varunjain3/DecisionTree

# Preprocessing
X = shuffled.iloc[:, :-1].squeeze()
y = (shuffled.iloc[:, -1:]).T.squeeze()
len_estate = len(y)

# Splitting data
X_train, y_train = X.loc[:split*len_estate], y.loc[:split*len_estate]
X_test, y_test = X.loc[split*len_estate+1:].reset_index(
    drop=True), y.loc[split*len_estate+1:].reset_index(drop=True)

# Learning tree
print("Please wait for some time, it takes time, you can change max depth if it takes too long time.")
tree = DecisionTree(criterion="information_gain", max_depth=max_depth)
tree.fit(X_train, y_train)
tree.plot()

# Printing accuracies for different depths
for depth in range(2, max_depth+1):
    y_hat = tree.predict(X_test, max_depth=depth)
    print("Depth: ", depth)
    print('\tRMSE: ', rmse(y_hat, y_test))
    print('\tMAE: ', mae(y_hat, y_test))

# Decision Tree Regressor from Sci-kit learn
dt = DecisionTreeRegressor(random_state=0)
dt.fit(X_train, y_train)
y_hat = pd.Series(dt.predict(X_test))

print('Sklearn RMSE: ', rmse(y_hat, y_test))
print('Sklearn MAE: ', mae(y_hat, y_test))

예제 #17

0

파일 보기

import numpy as np
import matplotlib.pyplot as plt
from tree.base import DecisionTree
from metrics import *
from sklearn.tree import DecisionTreeRegressor

np.random.seed(42)

# Read real-estate data set
# ...
#
estate = pd.read_csv('Real_estate.csv', index_col='No', dtype=float)
estate = estate.sample(frac=1).reset_index(drop=True)
split_at = int(0.3 * (estate.shape[0]))
X_train = estate.iloc[:split_at, :-1]
y_train = estate.iloc[:split_at, -1]
X_test = estate.iloc[split_at:, :-1]
y_test = estate.iloc[split_at:, -1]

model = DecisionTree(max_depth=2)
model.fit(X_train, y_train)
y_out = model.predict(X_test)
print("Rmse is: ", rmse(y_out, y_test))
print("Mae is: ", mae(y_out, y_test))

model2 = DecisionTreeRegressor(max_depth=2)
model2.fit(X_train, y_train)
y_out = model2.predict(X_test)
print("Rmse of Sklearn is: ", rmse(y_out, y_test))
print("Mae of Sklearn is: ", mae(y_out, y_test))

예제 #18

0

파일 보기

X = data.iloc[:train_test_split, :-1]
X_test = data.iloc[train_test_split:, :-1]
y = data.iloc[:train_test_split, -1]
y_test = data.iloc[train_test_split:, -1]


maxdepth = 4

# Building Decesion Tree based on my model
criteria = 'information_gain'
mytree = DecisionTree(criterion=criteria, max_depth=maxdepth) #Split based on Inf. Gain
mytree.fit(X, y)
mytree.plot()

print("My Model")
y_hat = mytree.predict(X)
print("Train Scores:")
print('\tRMSE: ', rmse(y_hat, y))
print('\tMAE: ', mae(y_hat, y))

y_test_hat = mytree.predict(X_test)
print("Test Scores:")
print('\tRMSE: ', rmse(y_test_hat, y_test))
print('\tMAE: ', mae(y_test_hat, y_test))

###################################################################################

# Building Decesion Tree based on sklearn
print("Sklearn Model")
clf = tree.DecisionTreeRegressor(max_depth=maxdepth)
clf = clf.fit(X,y)

예제 #19

0

파일 보기

파일: q1_ADABoost.py 프로젝트: rkreddy99/Implementation-of-Adaboost-Bagging-Random-Forest.

print("-----------------------------------------------------------")
print("Decision stump on random data")
print("-----------------------------------------------------------")
N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 3
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")

criteria = 'information_gain'
tree = DecisionTree(criterion=criteria)
re = X.shape[0]
img_weights = [1 / re] * re
tree.fit(X, y, img_weights)
yhat = pd.Series(tree.predict(X))
print('Criteria :', criteria)
print('Accuracy: ', accuracy(yhat, y))
for cls in y.unique():
    print("***Class :" + str(cls) + "***")
    print('Precision: ', precision(yhat, y, cls))
    print('Recall: ', recall(yhat, y, cls))

print("-----------------------------------------------------------")
print("Adaboost on random data")
print("-----------------------------------------------------------")

Classifier_AB = AdaBoostClassifier(base_estimator=tree,
                                   n_estimators=n_estimators)
Classifier_AB.fit(X, y)
y_hat = Classifier_AB.predict(X)

예제 #20

0

파일 보기

for i in range(len(y)):
    if (y[i]!='Iris-virginica'):
        y[i] = 'not virginica'
N = len(y)
t = int(np.floor(0.6*N))
X_train = X.iloc[:t,:]
y_train = y[:t]
X_test = X.iloc[t:,:]
y_test = list(y[t:])
y_test = pd.Series(y_test)
criteria = 'information_gain'
tree = DecisionTree(criterion=criteria,max_depth=1)
Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators )
Classifier_AB.fit(X_train, y_train)
y_hat = Classifier_AB.predict(X_test)
# [fig1, fig2] = Classifier_AB.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y_test))
for cls in y.unique():
    print('Precision: ', precision(y_hat, y_test, cls))
    print('Recall: ', recall(y_hat, y_test, cls))

print("\nDECISION STUMP")
tree.fit(X_train,y_train,np.ones(N)/N)
y_hat = tree.predict(X_test)
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y_test))
for cls in y.unique():
    print('Precision: ', precision(y_hat, y_test, cls))
    print('Recall: ', recall(y_hat, y_test, cls))