Esempio n. 1
0
def test_predict_example_data(test_input):
    """Test learn and predict."""
    train, test = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(train)
    assert_array_equal(_dn.predict(test), np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Esempio n. 2
0
def test_learn_example_dataset_1(test_input):
    """Learn from the example database."""
    train, _ = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(train)
    assert len(_dn.estimators_) == test_input
Esempio n. 3
0
def test_initializing_example_background_knowledge_3():
    """Test initializing with example data modes and extra parameters."""
    train, _ = load_toy_cancer()
    _bk = Background(
        modes=train.modes,
        line_search=True,
        recursion=True,
        node_size=3,
        max_tree_depth=4,
        number_of_clauses=8,
        number_of_cycles=10,
        ok_if_unknown=["smokes/1", "friends/2"],
        bridgers=["friends/2"],
    )
    assert _bk.modes == train.modes

    _capture = str(_bk)
    assert "setParam: nodeSize=3." in _capture
    assert "setParam: maxTreeDepth=4." in _capture
    assert "setParam: numOfCycles=10." in _capture
    assert "setParam: numOfClauses=8." in _capture
    assert "setParam: lineSearch=true." in _capture
    assert "setParam: recursion=true." in _capture
    assert "friends(+Person,-Person)." in _capture
    assert "friends(-Person,+Person)." in _capture
    assert "smokes(+Person)." in _capture
    assert "cancer(+Person)." in _capture
    assert "okIfUnknown: smokes/1." in _capture
    assert "okIfUnknown: friends/2." in _capture
    assert "bridger: friends/2." in _capture
Esempio n. 4
0
def test_initialize_bad_neg_pos_ratio(test_input):
    """Tests bad values for neg_pos_ratio"""
    _dn = BoostedRDN(target="cancer",
                     background=Background(),
                     neg_pos_ratio=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
Esempio n. 5
0
def test_initialize_bad_n_estimators(test_input):
    """Test bad values for n_estimators"""
    _dn = BoostedRDN(target="cancer",
                     background=Background(),
                     n_estimators=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
def test_len_train_pos_and_neg():
    """Simple test for length of training data pos/neg/facts."""
    train, test = load_toy_cancer()
    assert len(train.pos) == 4
    assert len(train.neg) == 2
    assert len(train.facts) == 15
    assert len(test.pos) == 3
    assert len(test.neg) == 2
    assert len(test.facts) == 13
def test_train_database_train_objects_exist():
    """Check for ``ToyCancer.train.pos``, ..., ``ToyCancer.train.facts``"""
    train, test = load_toy_cancer()
    assert train.pos is not None
    assert train.neg is not None
    assert train.facts is not None
    assert test.pos is not None
    assert test.neg is not None
    assert test.facts is not None
def test_toy_cancer_predict_after_load(test_input):
    """Load a ToyCancer json file and predict."""
    clf = BoostedRDN()
    clf.from_json(
        "srlearn/tests/regression_tests/json/toy_cancer_{0}.json".format(
            test_input))
    _, test = load_toy_cancer()
    _predictions = clf.predict(test)
    assert_array_equal(_predictions, np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Esempio n. 9
0
def test_cannot_read_outside_length_of_dotfiles():
    """Test that invalid tree indexes raise errors."""
    train, _ = load_toy_cancer()
    bkg = Background(modes=train.modes)
    clf = BoostedRDN(target="cancer", background=bkg)
    clf.fit(train)
    for test_input in [-10, -5, -1, 10]:
        with pytest.raises(IndexError):
            _ = export_digraph(clf, tree_index=test_input)
Esempio n. 10
0
def test_predict_proba_test_data():
    """Assert arrays are almost equal on output of predict_proba()"""
    train, test = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=5)
    _dn.fit(train)
    assert_array_almost_equal(
        _dn.predict_proba(test),
        np.array([0.74, 0.74, 0.74, 0.25, 0.25]),
        decimal=2,
    )
def test_feature_importances_toy_cancer():
    """Test getting the feature importances from the Toy-Cancer set."""
    train, _ = load_toy_cancer()
    bkg = Background(modes=train.modes)
    rdn = BoostedRDN(
        target="cancer",
        background=bkg,
        n_estimators=10,
    )
    rdn.fit(train)
    _features = rdn.feature_importances_
    assert _features.most_common(1)[0] == ("smokes", 10)
Esempio n. 12
0
def test_serialize_BoostedRDN(tmpdir):
    """Test that inference is possible after loading from json"""
    output_json = tmpdir.join("ToyCancerRDN.json")
    train, test = load_toy_cancer()
    bkg = Background(modes=train.modes)
    rdn = BoostedRDN(background=bkg, target="cancer", n_estimators=5)
    rdn.fit(train)
    rdn.to_json(output_json)

    # New BoostedRDN instance, loading from file, and running.
    rdn2 = BoostedRDN()
    rdn2.from_json(output_json)

    _predictions = rdn2.predict(test)
    assert len(rdn2.estimators_) == 5
    assert_array_equal(_predictions, np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Esempio n. 13
0
def test_initialize_example_background_knowledge_1():
    """Test initializing with example data modes"""
    train, _ = load_toy_cancer()
    _bk = Background(modes=train.modes)
    assert _bk.modes == train.modes
    assert not _bk.line_search
    assert not _bk.recursion

    _capture = str(_bk)
    assert "setParam: nodeSize=2." in _capture
    assert "setParam: maxTreeDepth=3." in _capture
    assert "setParam: numOfCycles=100." in _capture
    assert "setParam: numOfClauses=100." in _capture
    assert "friends(+Person,-Person)." in _capture
    assert "friends(-Person,+Person)." in _capture
    assert "smokes(+Person)." in _capture
    assert "cancer(+Person)." in _capture
Esempio n. 14
0
def test_initialize_bad_target(test_input):
    """Initialize an RDN with incorrect target values."""
    _dn = BoostedRDN(target=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
Esempio n. 15
0
def test_write_background_to_file_2(tmpdir):
    """Test writing Background object to a file with extra parameters."""
    train, _ = load_toy_cancer()
    _bk = Background(modes=train.modes, node_size=1, max_tree_depth=5)
    _bk.write(filename="train", location=pathlib.Path(tmpdir))
    assert tmpdir.join("train_bk.txt").read() == str(_bk)
def test_train_test_exists():
    """Check that ``ToyCancer.train`` and ``ToyCancer.test`` exist."""
    train, test = load_toy_cancer()
    assert train is not None
    assert test is not None
Esempio n. 17
0
def test_initialize_bad_background(test_input):
    """Test bad input for background"""
    _dn = BoostedRDN(target="cancer", background=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
The smokes-friends-cancer example is a common first example in probabilistic relational models, here
we use this set to learn a Relational Dependency Network (:class:`srlearn.rdn.BoostedRDN`).

This shows how the margin between positive and negative examples is maximized as the number of
iterations of boosting increases.
"""

from srlearn.rdn import BoostedRDN
from srlearn import Background
from srlearn.datasets import load_toy_cancer

import numpy as np
import matplotlib.pyplot as plt

train, test = load_toy_cancer()

bk = Background(modes=train.modes)

clf = BoostedRDN(
    background=bk,
    target="cancer",
    max_tree_depth=2,
    node_size=2,
    n_estimators=20,
)

clf.fit(train)

x = np.arange(1, 21)
y_pos = []