def test_DTree_best_split_minority(): # Using minority class model = DTree(metric=minority_class) X = avocados.iloc[:, :-1] y = avocados.iloc[:, -1] feature, impurity = model._best_split(X, y) assert feature == 'firmness' assert impurity == 0.25
def test_DTree_best_split_gini(): # Using gini model = DTree(metric=gini) X = avocados.iloc[:, :-1] y = avocados.iloc[:, -1] feature, impurity = model._best_split(X, y) assert feature == 'firmness' assert 0.33 < impurity < 0.34
def test_DTree_fit_recusively_child_labels(): """ Check recursively if each node is labeled """ def recursive(model): assert model._label is not None, "Each node should be labeled." if model._split: recursive(model._yes) recursive(model._no) X = avocados.iloc[:, :-1] y = avocados.iloc[:, -1] model = DTree(metric=minority_class) model.fit(X, y) recursive(model)
def test_DTree_fit_children(): """ Check if the root node has split (it should) and has child nodes """ X = avocados.iloc[:, :-1] y = avocados.iloc[:, -1] model = DTree(metric=minority_class) assert model._split is False, "Before fitting, this should not be set yet." assert model._yes is None, "Before fitting, this should not be set yet." assert model._no is None, "Before fitting, this should not be set yet." model.fit(X, y) assert model._split is not False, "After fitting, the top node should have split" assert isinstance(model._yes, DTree), "The Yes child node should be a subtree" assert isinstance(model._no, DTree), "The No child node should be a subtree"
def test_DTree_fit_recusively_children(): """ Check recursively if each node is either a leaf, or split and has two children """ def recursive(model): if model._split: assert isinstance(model._yes, DTree), "The Yes child node should be a subtree" assert isinstance(model._no, DTree), "The No child node should be a subtree" recursive(model._yes) recursive(model._no) X = avocados.iloc[:, :-1] y = avocados.iloc[:, -1] model = DTree(metric=minority_class) model.fit(X, y) recursive(model)
def test_DTree_fit_text_string(): """ Check if we've learned the RIGHT model """ X = avocados.iloc[:, :-1] y = avocados.iloc[:, -1] model = DTree(metric=minority_class) model.fit(X, y) text = model.to_text() # Compare the text that you actually got to what it should be: assert '\n' + text == (""" |---firmness = no | |---0 (2) |---firmness = yes | |---nub_loose = no | | |---0 (3) | |---nub_loose = yes | | |---1 (3) """), "The tree should look like this"
def test_DTree_fit_recusively_decreasing_impurity(): """ Check if the weighted impurity of children is always lower than that of the parent """ def recursive_impurity(model): if model._split: yes_impurity, yes_samples = recursive_impurity(model._yes) no_impurity, no_samples = recursive_impurity(model._no) weighted_impurity = (yes_impurity * yes_samples) + (no_impurity * no_samples) assert weighted_impurity < (model._impurity * model._samples), ( "The weighted impurity of the children should be smaller than the parent" ) return model._impurity, model._samples X = avocados.iloc[:, :-1] y = avocados.iloc[:, -1] model = DTree(metric=minority_class) model.fit(X, y) recursive_impurity(model)