Example #1
0
def test_learn_example_dataset_1(test_input):
    """Learn from the example database."""
    train, _ = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(train)
    assert len(_dn.estimators_) == test_input
Example #2
0
def test_initialize_bad_n_estimators(test_input):
    """Test bad values for n_estimators"""
    _dn = BoostedRDN(target="cancer",
                     background=Background(),
                     n_estimators=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)
Example #3
0
def test_learn_example_dataset_1(test_input):
    """Learn from the example database."""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(example_data.train)
    assert len(_dn.estimators_) == test_input
Example #4
0
def test_predict_example_data(test_input):
    """Test learn and predict."""
    train, test = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(train)
    assert_array_equal(_dn.predict(test), np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Example #5
0
def test_predict_example_data(test_input):
    """Test learn and predict."""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(example_data.train)
    assert_array_equal(_dn.predict(example_data.test),
                       np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Example #6
0
def test_initialize_bad_neg_pos_ratio(test_input):
    """Tests bad values for neg_pos_ratio"""
    _dn = BoostedRDN(target="cancer",
                     background=Background(),
                     neg_pos_ratio=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
def test_toy_cancer_predict_after_load(test_input):
    """Load a ToyCancer json file and predict."""
    clf = BoostedRDN()
    clf.from_json(
        "srlearn/tests/regression_tests/json/toy_cancer_{0}.json".format(
            test_input))
    _, test = load_toy_cancer()
    _predictions = clf.predict(test)
    assert_array_equal(_predictions, np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Example #8
0
def test_cannot_read_outside_length_of_dotfiles():
    """Test that invalid tree indexes raise errors."""
    train, _ = load_toy_cancer()
    bkg = Background(modes=train.modes)
    clf = BoostedRDN(target="cancer", background=bkg)
    clf.fit(train)
    for test_input in [-10, -5, -1, 10]:
        with pytest.raises(IndexError):
            _ = export_digraph(clf, tree_index=test_input)
Example #9
0
def test_predict_proba_test_data():
    """Assert arrays are almost equal on output of predict_proba()"""
    train, test = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=5)
    _dn.fit(train)
    assert_array_almost_equal(
        _dn.predict_proba(test),
        np.array([0.74, 0.74, 0.74, 0.25, 0.25]),
        decimal=2,
    )
Example #10
0
def test_predict_proba_test_data():
    """Assert arrays are almost equal on output of predict_proba()"""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=5)
    _dn.fit(example_data.train)
    assert_array_almost_equal(
        _dn.predict_proba(example_data.test),
        np.array([0.74, 0.74, 0.74, 0.25, 0.25]),
        decimal=2,
    )
def test_feature_importances_toy_cancer():
    """Test getting the feature importances from the Toy-Cancer set."""
    train, _ = load_toy_cancer()
    bkg = Background(modes=train.modes)
    rdn = BoostedRDN(
        target="cancer",
        background=bkg,
        n_estimators=10,
    )
    rdn.fit(train)
    _features = rdn.feature_importances_
    assert _features.most_common(1)[0] == ("smokes", 10)
        neg="datasets/imdb/train1/train1_neg.txt",
        facts="datasets/imdb/train1/train1_facts.txt",
    )

    bk = Background(modes=[
        "actor(+person).", "movie(+movie, +person).",
        "movie(+movie, -person).", "movie(-movie, +person).",
        "female_gender(+person).", "genre(+person, +genre).",
        "genre(+person, #genre).", "genre(+person, -genre).",
        "genre(-person, +genre).", "workedunder(+person, +person).",
        "workedunder(+person, -person).", "workedunder(-person, +person)."
    ], )

    clf = BoostedRDN(
        background=bk,
        target="workedunder",
        node_size=3,
        max_tree_depth=3,
    )

    _start = time.perf_counter()

    clf.fit(db)

    _end = time.perf_counter()

    _difference = _end - _start
    print(_difference)
    _train_times.append(_difference)

print("Mean runtime for `clf.fit()` and standard deviation:")
print(np.mean(_train_times))
from srlearn.rdn import BoostedRDN
from srlearn import Background

bk = Background(
    modes=[
        "male(+name).", "father(+name,+name).", "childof(+name,+name).",
        "siblingof(+name,+name)."
    ],
    node_size=1,
    number_of_clauses=8,
)

clf = BoostedRDN(
    background=bk,
    target="father",
    n_estimators=5,
)

clf.fit(train)

# %%
# It's important to check whether we actually learn something useful.
# We'll visually inspect the relational regression trees to see what
# they learned.

from srlearn.plotting import plot_digraph
from srlearn.plotting import export_digraph

plot_digraph(export_digraph(clf, 0), format="html")
def test_feature_importances_before_fit():
    """Test that one cannot get feature importances before fit."""
    rdn = BoostedRDN()
    with pytest.raises(ValueError):
        rdn.feature_importances_
Example #15
0
def test_bad_shell_command():
    """Test running a shell command which cannot exit 0"""
    _dn = BoostedRDN()
    _call = "git bat"
    with pytest.raises(RuntimeError):
        _dn._call_shell_command(_call)
Example #16
0
def test_initialize_bad_target(test_input):
    """Initialize an RDN with incorrect target values."""
    _dn = BoostedRDN(target=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
Example #17
0
def test_initialize_bad_target(test_input):
    """Initialize an RDN with incorrect target values."""
    _dn = BoostedRDN(target=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)
Example #18
0
def test_initialize_rdn_trees(test_input):
    """Initialize an RDN with various tree numbers."""
    _dn = BoostedRDN(n_estimators=test_input)
    assert _dn.n_estimators == test_input
Example #19
0
def test_initialize_rdn_1():
    """Initialize an RDN with default parameters."""
    _dn = BoostedRDN()
    assert _dn.target == "None"
    assert _dn.n_estimators == 10
        "courseprof(+course,-person).",
        "courseta(+course,-person).",
        "courseta(-course,+person).",
        "project(-proj,+person).",
        "project(+proj,-person).",
        "sameperson(-person,+person).",
        "faculty(+person).",
        "student(+person).",
    ],
    number_of_clauses=8,
)

clf = BoostedRDN(
    background=bkg,
    target="faculty",
    max_tree_depth=3,
    node_size=3,
    n_estimators=10,
)

clf.fit(train)

# %%
# The built-in ``feature_importances_`` attribute of a fit classifier is a
# Counter of how many times a features appears across the trees:

clf.feature_importances_

# %%
# These should generally be looked at while looking at the trees, so we'll
# plot the first tree here as well.
    )

    bk = Background(modes=[
        "courseprof(-Course, +Person).",
        "courseprof(+Course, -Person).",
        "courseta(+Course, -Person).",
        "courseta(-Course, +Person).",
        "faculty(+Person).",
        "project(-Proj, +Person).",
        "project(+Proj, -Person).",
        "sameperson(-Person, +Person).",
    ], )

    clf = BoostedRDN(
        background=bk,
        target="faculty",
        node_size=2,
        max_tree_depth=3,
    )

    _start = time.perf_counter()

    clf.fit(db)

    _end = time.perf_counter()

    _difference = _end - _start
    print(_difference)
    _train_times.append(_difference)

print("Mean runtime for `clf.fit()` and standard deviation:")
print(np.mean(_train_times))
Example #22
0
def test_serialize_BoostedRDN(tmpdir):
    """Test that inference is possible after loading from json"""
    output_json = tmpdir.join("ToyCancerRDN.json")
    train, test = load_toy_cancer()
    bkg = Background(modes=train.modes)
    rdn = BoostedRDN(background=bkg, target="cancer", n_estimators=5)
    rdn.fit(train)
    rdn.to_json(output_json)

    # New BoostedRDN instance, loading from file, and running.
    rdn2 = BoostedRDN()
    rdn2.from_json(output_json)

    _predictions = rdn2.predict(test)
    assert len(rdn2.estimators_) == 5
    assert_array_equal(_predictions, np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Example #23
0
def test_initialize_bad_background(test_input):
    """Test bad input for background"""
    _dn = BoostedRDN(target="cancer", background=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
from srlearn.rdn import BoostedRDN
from srlearn import Background
from srlearn import example_data

import numpy as np
import matplotlib.pyplot as plt

bk = Background(
    modes=example_data.train.modes,
    use_std_logic_variables=True,
)

clf = BoostedRDN(
    background=bk,
    target='cancer',
    max_tree_depth=2,
    node_size=2,
    n_estimators=20,
)

clf.fit(example_data.train)

x = np.arange(1, 21)
y_pos = []
y_neg = []
thresholds = []

for n_trees in x:
    clf.set_params(n_estimators=n_trees)
    probs = clf.predict_proba(example_data.test)
Example #25
0
]

bk = Background(
    modes=[
        "male(+name).",
        "father(+name,+name).",
        "childof(+name,+name).",
        "siblingof(+name,+name)."
    ],
    number_of_clauses=8,
    use_prolog_variables=True,
)

clf = BoostedRDN(
    background=bk,
    target="father",
    n_estimators=5,
)

clf.fit(train_db)

test_db = Database()

test_db.pos = [
    "father(elizabeth,mrbennet).",
    "father(jane,mrbennet).",
    "father(charlotte,mrlucas).",
]

test_db.neg = [
    "father(charlotte,mrsbennet).",
Example #26
0
def test_initialize_bad_background(test_input):
    """Test bad input for background"""
    _dn = BoostedRDN(target="cancer", background=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)