Esempio n. 1
0
def test_learn_example_dataset_1(test_input):
    """Learn from the example database."""
    train, _ = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(train)
    assert len(_dn.estimators_) == test_input
Esempio n. 2
0
def test_initialize_bad_n_estimators(test_input):
    """Test bad values for n_estimators"""
    _dn = BoostedRDN(target="cancer",
                     background=Background(),
                     n_estimators=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)
Esempio n. 3
0
def test_learn_example_dataset_1(test_input):
    """Learn from the example database."""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(example_data.train)
    assert len(_dn.estimators_) == test_input
Esempio n. 4
0
def test_predict_example_data(test_input):
    """Test learn and predict."""
    train, test = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(train)
    assert_array_equal(_dn.predict(test), np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Esempio n. 5
0
def test_predict_example_data(test_input):
    """Test learn and predict."""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input)
    _dn.fit(example_data.train)
    assert_array_equal(_dn.predict(example_data.test),
                       np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Esempio n. 6
0
def test_initialize_bad_neg_pos_ratio(test_input):
    """Tests bad values for neg_pos_ratio"""
    _dn = BoostedRDN(target="cancer",
                     background=Background(),
                     neg_pos_ratio=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
Esempio n. 7
0
def test_cannot_read_outside_length_of_dotfiles():
    """Test that invalid tree indexes raise errors."""
    train, _ = load_toy_cancer()
    bkg = Background(modes=train.modes)
    clf = BoostedRDN(target="cancer", background=bkg)
    clf.fit(train)
    for test_input in [-10, -5, -1, 10]:
        with pytest.raises(IndexError):
            _ = export_digraph(clf, tree_index=test_input)
Esempio n. 8
0
def test_predict_proba_test_data():
    """Assert arrays are almost equal on output of predict_proba()"""
    train, test = load_toy_cancer()
    _bk = Background(modes=train.modes)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=5)
    _dn.fit(train)
    assert_array_almost_equal(
        _dn.predict_proba(test),
        np.array([0.74, 0.74, 0.74, 0.25, 0.25]),
        decimal=2,
    )
Esempio n. 9
0
def test_predict_proba_test_data():
    """Assert arrays are almost equal on output of predict_proba()"""
    _bk = Background(modes=example_data.train.modes,
                     use_std_logic_variables=True)
    _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=5)
    _dn.fit(example_data.train)
    assert_array_almost_equal(
        _dn.predict_proba(example_data.test),
        np.array([0.74, 0.74, 0.74, 0.25, 0.25]),
        decimal=2,
    )
def test_feature_importances_toy_cancer():
    """Test getting the feature importances from the Toy-Cancer set."""
    train, _ = load_toy_cancer()
    bkg = Background(modes=train.modes)
    rdn = BoostedRDN(
        target="cancer",
        background=bkg,
        n_estimators=10,
    )
    rdn.fit(train)
    _features = rdn.feature_importances_
    assert _features.most_common(1)[0] == ("smokes", 10)
Esempio n. 11
0
def test_serialize_BoostedRDN(tmpdir):
    """Test that inference is possible after loading from json"""
    output_json = tmpdir.join("ToyCancerRDN.json")
    train, test = load_toy_cancer()
    bkg = Background(modes=train.modes)
    rdn = BoostedRDN(background=bkg, target="cancer", n_estimators=5)
    rdn.fit(train)
    rdn.to_json(output_json)

    # New BoostedRDN instance, loading from file, and running.
    rdn2 = BoostedRDN()
    rdn2.from_json(output_json)

    _predictions = rdn2.predict(test)
    assert len(rdn2.estimators_) == 5
    assert_array_equal(_predictions, np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
Esempio n. 12
0
        "male(+name).",
        "father(+name,+name).",
        "childof(+name,+name).",
        "siblingof(+name,+name)."
    ],
    number_of_clauses=8,
    use_prolog_variables=True,
)

clf = BoostedRDN(
    background=bk,
    target="father",
    n_estimators=5,
)

clf.fit(train_db)

test_db = Database()

test_db.pos = [
    "father(elizabeth,mrbennet).",
    "father(jane,mrbennet).",
    "father(charlotte,mrlucas).",
]

test_db.neg = [
    "father(charlotte,mrsbennet).",
    "father(jane,mrlucas).",
    "father(mrsbennet,mrbennet).",
    "father(jane,elizabeth).",
]
import matplotlib.pyplot as plt

bk = Background(
    modes=example_data.train.modes,
    use_std_logic_variables=True,
)

clf = BoostedRDN(
    background=bk,
    target='cancer',
    max_tree_depth=2,
    node_size=2,
    n_estimators=20,
)

clf.fit(example_data.train)

x = np.arange(1, 21)
y_pos = []
y_neg = []
thresholds = []

for n_trees in x:
    clf.set_params(n_estimators=n_trees)
    probs = clf.predict_proba(example_data.test)

    thresholds.append(clf.threshold_)
    y_pos.append(np.mean(probs[np.nonzero(clf.classes_)]))
    y_neg.append(np.mean(probs[clf.classes_ == 0]))

thresholds = np.array(thresholds)
        "actor(+person).", "movie(+movie, +person).",
        "movie(+movie, -person).", "movie(-movie, +person).",
        "female_gender(+person).", "genre(+person, +genre).",
        "genre(+person, #genre).", "genre(+person, -genre).",
        "genre(-person, +genre).", "workedunder(+person, +person).",
        "workedunder(+person, -person).", "workedunder(-person, +person)."
    ], )

    clf = BoostedRDN(
        background=bk,
        target="workedunder",
        node_size=3,
        max_tree_depth=3,
    )

    _start = time.perf_counter()

    clf.fit(db)

    _end = time.perf_counter()

    _difference = _end - _start
    print(_difference)
    _train_times.append(_difference)

print("Mean runtime for `clf.fit()` and standard deviation:")
print(np.mean(_train_times))
print(np.std(_train_times))

print("Raw numbers", _train_times)
Esempio n. 15
0
def test_initialize_bad_background(test_input):
    """Test bad input for background"""
    _dn = BoostedRDN(target="cancer", background=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)
Esempio n. 16
0
def test_initialize_bad_target(test_input):
    """Initialize an RDN with incorrect target values."""
    _dn = BoostedRDN(target=test_input)
    with pytest.raises(ValueError):
        _dn.fit(example_data.train)
bk = Background(
    modes=[
        "male(+name).", "father(+name,+name).", "childof(+name,+name).",
        "siblingof(+name,+name)."
    ],
    node_size=1,
    number_of_clauses=8,
)

clf = BoostedRDN(
    background=bk,
    target="father",
    n_estimators=5,
)

clf.fit(train)

# %%
# It's important to check whether we actually learn something useful.
# We'll visually inspect the relational regression trees to see what
# they learned.

from srlearn.plotting import plot_digraph
from srlearn.plotting import export_digraph

plot_digraph(export_digraph(clf, 0), format="html")

# %%
# There is some variance between runs, but in the concept that the
# trees pick up on is roughly that "*A father has a child and is male.*"
Esempio n. 18
0
def test_initialize_bad_target(test_input):
    """Initialize an RDN with incorrect target values."""
    _dn = BoostedRDN(target=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)
Esempio n. 19
0
def test_initialize_bad_background(test_input):
    """Test bad input for background"""
    _dn = BoostedRDN(target="cancer", background=test_input)
    train, _ = load_toy_cancer()
    with pytest.raises(ValueError):
        _dn.fit(train)