def test_predict_example_data(test_input): """Test learn and predict.""" train, test = load_toy_cancer() _bk = Background(modes=train.modes) _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input) _dn.fit(train) assert_array_equal(_dn.predict(test), np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
def test_learn_example_dataset_1(test_input): """Learn from the example database.""" train, _ = load_toy_cancer() _bk = Background(modes=train.modes) _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=test_input) _dn.fit(train) assert len(_dn.estimators_) == test_input
def test_initializing_example_background_knowledge_3(): """Test initializing with example data modes and extra parameters.""" train, _ = load_toy_cancer() _bk = Background( modes=train.modes, line_search=True, recursion=True, node_size=3, max_tree_depth=4, number_of_clauses=8, number_of_cycles=10, ok_if_unknown=["smokes/1", "friends/2"], bridgers=["friends/2"], ) assert _bk.modes == train.modes _capture = str(_bk) assert "setParam: nodeSize=3." in _capture assert "setParam: maxTreeDepth=4." in _capture assert "setParam: numOfCycles=10." in _capture assert "setParam: numOfClauses=8." in _capture assert "setParam: lineSearch=true." in _capture assert "setParam: recursion=true." in _capture assert "friends(+Person,-Person)." in _capture assert "friends(-Person,+Person)." in _capture assert "smokes(+Person)." in _capture assert "cancer(+Person)." in _capture assert "okIfUnknown: smokes/1." in _capture assert "okIfUnknown: friends/2." in _capture assert "bridger: friends/2." in _capture
def test_initialize_bad_neg_pos_ratio(test_input): """Tests bad values for neg_pos_ratio""" _dn = BoostedRDN(target="cancer", background=Background(), neg_pos_ratio=test_input) train, _ = load_toy_cancer() with pytest.raises(ValueError): _dn.fit(train)
def test_initialize_bad_n_estimators(test_input): """Test bad values for n_estimators""" _dn = BoostedRDN(target="cancer", background=Background(), n_estimators=test_input) train, _ = load_toy_cancer() with pytest.raises(ValueError): _dn.fit(train)
def test_len_train_pos_and_neg(): """Simple test for length of training data pos/neg/facts.""" train, test = load_toy_cancer() assert len(train.pos) == 4 assert len(train.neg) == 2 assert len(train.facts) == 15 assert len(test.pos) == 3 assert len(test.neg) == 2 assert len(test.facts) == 13
def test_train_database_train_objects_exist(): """Check for ``ToyCancer.train.pos``, ..., ``ToyCancer.train.facts``""" train, test = load_toy_cancer() assert train.pos is not None assert train.neg is not None assert train.facts is not None assert test.pos is not None assert test.neg is not None assert test.facts is not None
def test_toy_cancer_predict_after_load(test_input): """Load a ToyCancer json file and predict.""" clf = BoostedRDN() clf.from_json( "srlearn/tests/regression_tests/json/toy_cancer_{0}.json".format( test_input)) _, test = load_toy_cancer() _predictions = clf.predict(test) assert_array_equal(_predictions, np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
def test_cannot_read_outside_length_of_dotfiles(): """Test that invalid tree indexes raise errors.""" train, _ = load_toy_cancer() bkg = Background(modes=train.modes) clf = BoostedRDN(target="cancer", background=bkg) clf.fit(train) for test_input in [-10, -5, -1, 10]: with pytest.raises(IndexError): _ = export_digraph(clf, tree_index=test_input)
def test_predict_proba_test_data(): """Assert arrays are almost equal on output of predict_proba()""" train, test = load_toy_cancer() _bk = Background(modes=train.modes) _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=5) _dn.fit(train) assert_array_almost_equal( _dn.predict_proba(test), np.array([0.74, 0.74, 0.74, 0.25, 0.25]), decimal=2, )
def test_feature_importances_toy_cancer(): """Test getting the feature importances from the Toy-Cancer set.""" train, _ = load_toy_cancer() bkg = Background(modes=train.modes) rdn = BoostedRDN( target="cancer", background=bkg, n_estimators=10, ) rdn.fit(train) _features = rdn.feature_importances_ assert _features.most_common(1)[0] == ("smokes", 10)
def test_serialize_BoostedRDN(tmpdir): """Test that inference is possible after loading from json""" output_json = tmpdir.join("ToyCancerRDN.json") train, test = load_toy_cancer() bkg = Background(modes=train.modes) rdn = BoostedRDN(background=bkg, target="cancer", n_estimators=5) rdn.fit(train) rdn.to_json(output_json) # New BoostedRDN instance, loading from file, and running. rdn2 = BoostedRDN() rdn2.from_json(output_json) _predictions = rdn2.predict(test) assert len(rdn2.estimators_) == 5 assert_array_equal(_predictions, np.array([1.0, 1.0, 1.0, 0.0, 0.0]))
def test_initialize_example_background_knowledge_1(): """Test initializing with example data modes""" train, _ = load_toy_cancer() _bk = Background(modes=train.modes) assert _bk.modes == train.modes assert not _bk.line_search assert not _bk.recursion _capture = str(_bk) assert "setParam: nodeSize=2." in _capture assert "setParam: maxTreeDepth=3." in _capture assert "setParam: numOfCycles=100." in _capture assert "setParam: numOfClauses=100." in _capture assert "friends(+Person,-Person)." in _capture assert "friends(-Person,+Person)." in _capture assert "smokes(+Person)." in _capture assert "cancer(+Person)." in _capture
def test_initialize_bad_target(test_input): """Initialize an RDN with incorrect target values.""" _dn = BoostedRDN(target=test_input) train, _ = load_toy_cancer() with pytest.raises(ValueError): _dn.fit(train)
def test_write_background_to_file_2(tmpdir): """Test writing Background object to a file with extra parameters.""" train, _ = load_toy_cancer() _bk = Background(modes=train.modes, node_size=1, max_tree_depth=5) _bk.write(filename="train", location=pathlib.Path(tmpdir)) assert tmpdir.join("train_bk.txt").read() == str(_bk)
def test_train_test_exists(): """Check that ``ToyCancer.train`` and ``ToyCancer.test`` exist.""" train, test = load_toy_cancer() assert train is not None assert test is not None
def test_initialize_bad_background(test_input): """Test bad input for background""" _dn = BoostedRDN(target="cancer", background=test_input) train, _ = load_toy_cancer() with pytest.raises(ValueError): _dn.fit(train)
The smokes-friends-cancer example is a common first example in probabilistic relational models, here we use this set to learn a Relational Dependency Network (:class:`srlearn.rdn.BoostedRDN`). This shows how the margin between positive and negative examples is maximized as the number of iterations of boosting increases. """ from srlearn.rdn import BoostedRDN from srlearn import Background from srlearn.datasets import load_toy_cancer import numpy as np import matplotlib.pyplot as plt train, test = load_toy_cancer() bk = Background(modes=train.modes) clf = BoostedRDN( background=bk, target="cancer", max_tree_depth=2, node_size=2, n_estimators=20, ) clf.fit(train) x = np.arange(1, 21) y_pos = []