def test_DTree_best_split_minority():
    # Using minority class
    model = DTree(metric=minority_class)
    X = avocados.iloc[:, :-1]
    y = avocados.iloc[:, -1]
    feature, impurity = model._best_split(X, y)
    assert feature == 'firmness'
    assert impurity == 0.25
def test_DTree_best_split_gini():
    # Using gini
    model = DTree(metric=gini)
    X = avocados.iloc[:, :-1]
    y = avocados.iloc[:, -1]
    feature, impurity = model._best_split(X, y)
    assert feature == 'firmness'
    assert 0.33 < impurity < 0.34
def test_DTree_fit_recusively_child_labels():
    """ Check recursively if each node is labeled """
    def recursive(model):
        assert model._label is not None, "Each node should be labeled."
        if model._split:
            recursive(model._yes)
            recursive(model._no)

    X = avocados.iloc[:, :-1]
    y = avocados.iloc[:, -1]
    model = DTree(metric=minority_class)
    model.fit(X, y)
    recursive(model)
def test_DTree_fit_children():
    """ Check if the root node has split (it should) and has child nodes """
    X = avocados.iloc[:, :-1]
    y = avocados.iloc[:, -1]
    model = DTree(metric=minority_class)
    assert model._split is False, "Before fitting, this should not be set yet."
    assert model._yes is None, "Before fitting, this should not be set yet."
    assert model._no is None, "Before fitting, this should not be set yet."
    model.fit(X, y)
    assert model._split is not False, "After fitting, the top node should have split"
    assert isinstance(model._yes,
                      DTree), "The Yes child node should be a subtree"
    assert isinstance(model._no,
                      DTree), "The No child node should be a subtree"
def test_DTree_fit_recusively_children():
    """ Check recursively if each node is either a leaf, or split and has two children """
    def recursive(model):
        if model._split:
            assert isinstance(model._yes,
                              DTree), "The Yes child node should be a subtree"
            assert isinstance(model._no,
                              DTree), "The No child node should be a subtree"
            recursive(model._yes)
            recursive(model._no)

    X = avocados.iloc[:, :-1]
    y = avocados.iloc[:, -1]
    model = DTree(metric=minority_class)
    model.fit(X, y)
    recursive(model)
def test_DTree_fit_text_string():
    """ Check if we've learned the RIGHT model """
    X = avocados.iloc[:, :-1]
    y = avocados.iloc[:, -1]
    model = DTree(metric=minority_class)
    model.fit(X, y)
    text = model.to_text()
    # Compare the text that you actually got to what it should be:
    assert '\n' + text == ("""
|---firmness = no
|   |---0 (2)
|---firmness = yes
|   |---nub_loose = no
|   |   |---0 (3)
|   |---nub_loose = yes
|   |   |---1 (3)
"""), "The tree should look like this"
def test_DTree_fit_recusively_decreasing_impurity():
    """ Check if the weighted impurity of children is always lower than that of the parent """
    def recursive_impurity(model):
        if model._split:
            yes_impurity, yes_samples = recursive_impurity(model._yes)
            no_impurity, no_samples = recursive_impurity(model._no)
            weighted_impurity = (yes_impurity * yes_samples) + (no_impurity *
                                                                no_samples)
            assert weighted_impurity < (model._impurity * model._samples), (
                "The weighted impurity of the children should be smaller than the parent"
            )
        return model._impurity, model._samples

    X = avocados.iloc[:, :-1]
    y = avocados.iloc[:, -1]
    model = DTree(metric=minority_class)
    model.fit(X, y)
    recursive_impurity(model)