Python DecisionTree.fit Beispiele

Programmiersprache: Python

Namespace / Paketname: dtree

Klasse / Typ: DecisionTree

Methode / Funktion: fit

Beispiele auf hotexamples.com: 6

Python DecisionTree.fit - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die dtree.DecisionTree.fit, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

DecisionTree(12)

fit(6)

classify(4)

accuracy(3)

predict(3)

savePDF(2)

savePNG(2)

_count_leaf(1)

buildFromString(1)

executeTree(1)

index_to_class(1)

printState(1)

setCreditScore(1)

traverse(1)

treeToString(1)

Beispiel #1

Datei anzeigen

Datei: test_purity.py Projekt: hkimiaa/Decision-Tree-Implementation

def learn_with_purity(purity):  # training decision tree with specific purity
    dtree = DecisionTree(x_train, y_train, max_depth=n_attr, purity=purity)
    dtree.fit()
    # train_accuracy = dtree.accuracy(x_train, y_train)
    # test_accuracy = dtree.accuracy(x_test, y_test)
    test_preds = dtree.predict(x_test)
    return test_preds

Beispiel #2

Datei anzeigen

    def fit(self, X, y):
        '''The common way is to, at each node, pick d features randomly and 
        and select that which maximizes the information gain for splitting.
        This is done for performance reasons, and it's particularly useful if the
        number of features in the dataset is very large. 
        [In the API of sklearns DecisionTreeClassifier class, this option is given 
        through the paramter 'splitter'. In the API of DecisionForest the attribute
        'max_features' specifies the number of features consider]
        However, Since the number of features of our dataset was limited to 14, we decided to
        do an exhaustive search of the features at each node'''
        self.estimators_ = []

        # Russell's method
        if self.russells_method:
            for i in range(self.n_estimators):
                size = int(X.shape[1] / 1.5)
                idxs = np.random.choice(range(X.shape[1]), size, replace=False)
                samples = (X.T[idxs]).T
                tree = DecisionTree(max_depth=4)
                tree.fit(samples, y)
                self.estimators_.append(tree)
            return

        # Standard method
        for i in range(self.n_estimators):
            # for some reason I don't know, we draw n samples with REPLACEMENT
            # this is what is called bootstrapping
            idxs = np.random.choice(range(X.shape[0]), X.shape[0], \
                                    replace=True if self.bootstrap else False)
            tree = DecisionTree(max_depth=4)
            tree.fit(X[idxs], y[idxs])
            self.estimators_.append(tree)

Beispiel #3

Datei anzeigen

Datei: change_depth.py Projekt: hkimiaa/Decision-Tree-Implementation

def learn_depths():  # training decision tree for different heights
    train_acc = np.zeros(n_attr)
    test_acc = np.zeros(n_attr)
    for depth in range(n_attr):
        dtree = DecisionTree(x_train, y_train, max_depth=depth)
        dtree.fit()
        train_acc[depth] = dtree.accuracy(x_train, y_train)
        test_acc[depth] = dtree.accuracy(x_test, y_test)
    df = pd.DataFrame({
        'depth': range(1, n_attr + 1),
        'Train accuracy': train_acc,
        'Test accuracy': test_acc
    })
    # df.to_csv('res/acc.csv')
    return df

Beispiel #4

Datei anzeigen

def k_fold_cross_validation(x, y, k, shf=False):
    if shf:
        to_shf = np.column_stack((x, y))
        to_shf = list(to_shf)
        shuffle(to_shf)
        to_shf = np.array(to_shf)
        x = np.delete(to_shf, -1, axis=1)
        y = to_shf[:, -1]
    train_acc = np.zeros((k, n_attr))
    val_acc = np.zeros((k, n_attr))
    for d in range(k):
        print(d, "th fold...")
        x_train = np.array([row for i, row in enumerate(x) if i % k != d])
        x_val = np.array([row for i, row in enumerate(x) if i % k == d])
        y_train = np.array([val for i, val in enumerate(y) if i % k != d])
        y_val = np.array([val for i, val in enumerate(y) if i % k == d])
        for depth in range(n_attr):
            dtree = DecisionTree(x_train, y_train, max_depth=depth)
            dtree.fit()
            # train_acc[d, depth] = dtree.accuracy(x_train, y_train)
            val_acc[d, depth] = dtree.accuracy(x_val, y_val)
    return val_acc

Beispiel #5

Datei anzeigen

def main():
    parser = argparse.ArgumentParser(description="csv data file path")
    parser.add_argument("--csv", type=str, help="The data file path")
    parser.add_argument(
        "--eval",
        type=str,
        default="gini",
        help=
        "The evaluation function, could be gini or entropy. Default using gini."
    )
    cli_args = parser.parse_args()

    if cli_args.eval not in ['gini', 'entropy']:
        print('The evaluation function should be gini or entropy')
        exit(0)

    data = pd.read_csv(cli_args.csv)
    train = data.sample(frac=0.75, random_state=0)
    test = pd.concat([train, data]).drop_duplicates(keep=False)

    class_weights = {'setosa': 1, 'versicolor': 1, 'virginica': 1}
    tree = DecisionTree()
    tree.fit(train, class_weights, gini)
    # print(tree._error_rate(tree.root))
    print(tree._count_leaf(tree.root))
    # tree.prune(test, 0.0)

    print(tree.treeToString())

    data = pd.DataFrame(
        [[5.1, 3.5, np.nan, 1]],
        columns=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'])
    print(tree.classify(data))

    tree.savePDF('output.pdf')
    tree.savePNG('output.png')

Beispiel #6

Datei anzeigen

# %%
import numpy as np
from preprocess import get_train_data, get_test_data
from dtree import DecisionTree

x_train, y_train = get_train_data()
x_test, y_test = get_test_data()
decision_tree = DecisionTree(x_train, y_train, max_depth=1)
decision_tree.fit()
decision_tree.traverse()
y_hat = decision_tree.predict(x_test)
print("accuracy: ", decision_tree.accuracy(x_test, y_test))


# %%
def get_stats():
    TP = np.sum(np.logical_and(y_test == 1, y_hat == 1))
    FP = np.sum(np.logical_and(y_test == 0, y_hat == 1))
    TN = np.sum(np.logical_and(y_test == 0, y_hat == 0))
    FN = np.sum(np.logical_and(y_test == 1, y_hat == 0))
    return TP, FP, TN, FN


def specificity():
    TP, FP, TN, FN = get_stats()
    return TN / (TN + FP)


def sensitivity():
    TP, FP, TN, FN = get_stats()
    return TP / (TP + FN)