Exemple #1
0
def test_learn_example_dataset_1(test_input):
    """Learn from the example database."""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = RDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(example_data.train)
    assert len(_dn.estimators_) == test_input
Exemple #2
0
def test_initialize_bad_n_estimators(test_input):
    """Test bad values for n_estimators"""
    _dn = RDN(target="cancer",
              background=Background(),
              n_estimators=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)
Exemple #3
0
def test_predict_example_data(test_input):
    """Test learn and predict."""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = RDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(example_data.train)
    assert_array_equal(_dn.predict(example_data.test),
                       np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Exemple #4
0
def test_predict_proba_test_data():
    """Assert arrays are almost equal on output of predict_proba()"""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = RDN(background=_bk, target="cancer", n_estimators=5)
    _dn.fit(example_data.train)
    assert_array_almost_equal(
        _dn.predict_proba(example_data.test),
        np.array([0.74, 0.74, 0.74, 0.25, 0.25]),
        decimal=2,
    )
Exemple #5
0
def test_initialize_bad_background(test_input):
    """Test bad input for background"""
    _dn = RDN(target="cancer", background=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)
Exemple #6
0
def test_initialize_bad_target(test_input):
    """Initialize an RDN with incorrect target values."""
    _dn = RDN(target=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)
import matplotlib.pyplot as plt

bk = Background(
    modes=example_data.train.modes,
    use_std_logic_variables=True,
)

clf = RDN(
    background=bk,
    target='cancer',
    max_tree_depth=2,
    node_size=2,
    n_estimators=20,
)

clf.fit(example_data.train)

x = np.arange(1, 21)
y_pos = []
y_neg = []
thresholds = []

for n_trees in x:
    clf.set_params(n_estimators=n_trees)
    probs = clf.predict_proba(example_data.test)

    thresholds.append(clf.threshold_)
    y_pos.append(np.mean(probs[np.nonzero(clf.classes_)]))
    y_neg.append(np.mean(probs[clf.classes_ == 0]))

thresholds = np.array(thresholds)
Exemple #8
0
bk = Background(
    modes=[
        "male(+name).", "father(+name,+name).", "childof(+name,+name).",
        "siblingof(+name,+name)."
    ],
    number_of_clauses=8,
    use_prolog_variables=True,
)

clf = RDN(
    background=bk,
    target="father",
    n_estimators=5,
)

clf.fit(train_db)

test_db = Database()

test_db.pos = [
    "father(elizabeth,mrbennet).",
    "father(jane,mrbennet).",
    "father(charlotte,mrlucas).",
]

test_db.neg = [
    "father(charlotte,mrsbennet).",
    "father(jane,mrlucas).",
    "father(mrsbennet,mrbennet).",
    "father(jane,elizabeth).",
]