예제 #1
0
 def fit(self, X, y):
     self.trees = []
     for _ in range(self.n_trees):
         tree = Decision_Tree(min_samples_split=self.min_sample_split,
                              max_depth=self.max_depth,
                              n_feats=self.n_feats)
         X_sample, y_sample = bootstrap_sample(X, y)
         tree.fit(X_sample, y_sample)
         self.trees.append(tree)
예제 #2
0
def decision_tree_test():
    data = load_boston()
    X = data.data
    Y = data.target
    X = X[:, [0, 2, 4, 5, 7, 11]]
    dt = Decision_Tree(max_depth=5)
    dt.fit(X, Y)
    Y_pre = dt.predict(X[-20:])

    return Y_pre
예제 #3
0
 def fit(self, X, Y):
     # 循环训练每一颗决策树
     self.tree = []
     for s_t in range(self.n_trees):
         X_Sample, Y_Sample = boost_trap(X, Y)
         single_tree = Decision_Tree(max_depth=self.max_depth,
                                     classifier=True,
                                     Loss="Gini")
         single_tree.fit(X_Sample, Y_Sample)
         self.tree.append(single_tree)
         pass
     pass
예제 #4
0
    def decision_tree(self, training_data, training_labels, testing_data):

        # Create and build the decision tree
        tree = Decision_Tree(training_data, training_labels)

        tree.print_tree_dfs()

        # Test for when we encounter a new category not seen before in testing
        # test = ["low", "high", "high", "high", "high", "high", "high", "potato"]
        # print(f"YEET: {tree.predict(test)}")

        predictions = []

        for i in range(len(testing_data)):

            predictions.append(tree.predict(testing_data[i]))

        return predictions
예제 #5
0
from decision_tree import Decision_Tree
from sklearn.datasets import load_iris
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn import tree


# Test w/ Iris dataset using my class
dataset = load_iris()
X, y = dataset.data, dataset.target
clf_iris = Decision_Tree(max_depth = 5)
# Test to make target class strings instead of integers
y = ["one" if val == 1 or val == 2 else "zero" for val in y]
y = np.array(y)
# Need to ordinally encode strings to integers
if "int" not in str(y.dtype):
    # Reshape y array so it works w/ ordinal encoder
    y = y.reshape(-1, 1)
    encoder = OrdinalEncoder()
    y = encoder.fit_transform(y)
y = y.astype(int)
y = y.reshape(y.size,)

clf_iris.fit(X, y)
temp1 = np.array([[3, 2, 1, .5]])
temp2 = np.array([[4, 2.9, 1.3, .2]])
temp3 = np.array([[3.8, 3, 1.4, .4]])
temp4 = np.array([[7.7, 2.8, 6.7, 2]])
예제 #6
0
from read_file import Read_File
from data_processor import Data_Processor
from svm import SVM
from decision_tree import Decision_Tree

fileName = 'data/census-income_10percentData.csv'

file_reader = Read_File(fileName)
file_reader.read()

features = file_reader.get_features()
labels = file_reader.get_labels()

data_fill = Data_Processor(features)
data_fill.fill_empty_fields()

# will perform svm task
my_svm = SVM(features, labels)
my_svm.calculate_info_gain()
my_svm.stratified_k_fold(10)
my_svm.svm()
my_svm.draw_svm()

my_tree = Decision_Tree(features, labels)
my_tree.calculate_info_gain()
for i in range(1, 14):
    print "============================{}============================".format(
        i)
    my_tree.decision(i)
예제 #7
0
def decisionTree_class():
    X, Y = make_blobs(n_samples=100, centers=10, n_features=10, random_state=5)
    dt = Decision_Tree(max_depth=5, classifier=True, Loss="Gini")
    dt.fit(X, Y)
    Y_P = dt.predict(X)
    return Y_P, Y
예제 #8
0
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

from decision_tree import Decision_Tree


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


data = datasets.load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1234)

clf = Decision_Tree(max_depth=10)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
acc = accuracy(y_test, y_pred)

print("Accuracy:", acc)
예제 #9
0
                {
                    # Choosing month of date
                    "date": row[0].split("/")[0],
                    # log10 of confirmed
                    "confirmed": int(math.log10(int(row[1]))),
                    # log10 of recovered
                    "recovered": int(math.log10(int(row[2]))),
                    # log10 of deaths
                    "deaths": int(math.log10(int(row[3]))),
                },
                discretise_target(row[4]))
            examples.append(data)

    # Shuffling for randomness
    random.shuffle(examples)
    tre = list()
    tee = list()

    # Split the data 80/20
    split_index = (int)((80 / 100) * len(examples))
    tre = examples[:split_index]
    tee = examples[split_index:]

    decision_tree = Decision_Tree(tre, depth, pruning)
    print("DECISION TREE")
    print(decision_tree)
    print("MAXIMUM DEPTH REACHED")
    print(decision_tree.depth_reached)
    print("ACCURACY OVER TESTING")
    print(decision_tree.test_accuracy(tee) * 100, "%")