"childof(mollyweasley,ronweasley).", "childof(arthurweasley,fredweasley).", "childof(mollyweasley,fredweasley).", "childof(arthurweasley,georgeweasley).", "childof(mollyweasley,georgeweasley).", "childof(arthurweasley,ginnyweasley).", "childof(mollyweasley,ginnyweasley).", "childof(xenophiliuslovegood,lunalovegood).", "childof(cygnusblack,narcissamalfoy).", ] bk = Background( modes=[ "male(+name).", "father(+name,+name).", "childof(+name,+name).", "siblingof(+name,+name)." ], number_of_clauses=8, use_prolog_variables=True, ) clf = BoostedRDN( background=bk, target="father", n_estimators=5, ) clf.fit(train_db) test_db = Database()
_run_times = 10 _train_times = [] for _ in range(_run_times): db = Database.from_files( pos="datasets/imdb/train1/train1_pos.txt", neg="datasets/imdb/train1/train1_neg.txt", facts="datasets/imdb/train1/train1_facts.txt", ) bk = Background(modes=[ "actor(+person).", "movie(+movie, +person).", "movie(+movie, -person).", "movie(-movie, +person).", "female_gender(+person).", "genre(+person, +genre).", "genre(+person, #genre).", "genre(+person, -genre).", "genre(-person, +genre).", "workedunder(+person, +person).", "workedunder(+person, -person).", "workedunder(-person, +person)." ], ) clf = BoostedRDN( background=bk, target="workedunder", node_size=3, max_tree_depth=3, ) _start = time.perf_counter() clf.fit(db)
This makes use of :class:`srlearn.example_data`, which provides two `srlearn.Database` objects named ``example_data.train`` and ``example_data.test``. This shows how the margin between positive and negative examples is maximized as the number of iterations of boosting increases. """ from srlearn.rdn import BoostedRDN from srlearn import Background from srlearn import example_data import numpy as np import matplotlib.pyplot as plt bk = Background( modes=example_data.train.modes, use_std_logic_variables=True, ) clf = BoostedRDN( background=bk, target='cancer', max_tree_depth=2, node_size=2, n_estimators=20, ) clf.fit(example_data.train) x = np.arange(1, 21) y_pos = [] y_neg = []
_train_times = [] for _ in range(_run_times): db = Database.from_files( pos="datasets/webkb/train1/train1_pos.txt", neg="datasets/webkb/train1/train1_neg.txt", facts="datasets/webkb/train1/train1_facts.txt", ) bk = Background(modes=[ "courseprof(-Course, +Person).", "courseprof(+Course, -Person).", "courseta(+Course, -Person).", "courseta(-Course, +Person).", "faculty(+Person).", "project(-Proj, +Person).", "project(+Proj, -Person).", "sameperson(-Person, +Person).", ], ) clf = BoostedRDN( background=bk, target="faculty", node_size=2, max_tree_depth=3, ) _start = time.perf_counter() clf.fit(db)
# and who is a ``siblingof``. train.facts # %% # Our aim is to learn about what a "*father*" is in terms of the facts we have available. # This process is usually called *induction,* and is often portrayed as "learning a # definition of an object." from srlearn.rdn import BoostedRDN from srlearn import Background bk = Background( modes=[ "male(+name).", "father(+name,+name).", "childof(+name,+name).", "siblingof(+name,+name)." ], node_size=1, number_of_clauses=8, ) clf = BoostedRDN( background=bk, target="father", n_estimators=5, ) clf.fit(train) # %% # It's important to check whether we actually learn something useful. # We'll visually inspect the relational regression trees to see what
train, test = load("webkb", fold=1) # %% # We'll set up the learning problem and fit the classifier: from srlearn.rdn import BoostedRDN from srlearn import Background bkg = Background( modes=[ "courseprof(-course,+person).", "courseprof(+course,-person).", "courseta(+course,-person).", "courseta(-course,+person).", "project(-proj,+person).", "project(+proj,-person).", "sameperson(-person,+person).", "faculty(+person).", "student(+person).", ], number_of_clauses=8, ) clf = BoostedRDN( background=bkg, target="faculty", max_tree_depth=3, node_size=3, n_estimators=10, )
we use this set to learn a Relational Dependency Network (:class:`srlearn.rdn.BoostedRDN`). This shows how the margin between positive and negative examples is maximized as the number of iterations of boosting increases. """ from srlearn.rdn import BoostedRDN from srlearn import Background from srlearn.datasets import load_toy_cancer import numpy as np import matplotlib.pyplot as plt train, test = load_toy_cancer() bk = Background(modes=train.modes) clf = BoostedRDN( background=bk, target="cancer", max_tree_depth=2, node_size=2, n_estimators=20, ) clf.fit(train) x = np.arange(1, 21) y_pos = [] y_neg = [] thresholds = []
facts="datasets/uwcse/train1/train1_facts.txt", ) bk = Background(modes=[ "advisedby(+Person, +Person).", "courselevel(+Course, #Level).", "courselevel(+Course, +Level).", "hasposition(+Person, #Position).", "inphase(+Person, #Phase).", "professor(+Person).", "projectmember(-Project, +Person).", "projectmember(+Project, -Person).", "publication(-Title, +Person).", "publication(+Title, -Person).", "samecourse(+Course, +Course).", "sameperson(+Person, +Person).", "sameproject(+Project, +Project).", "student(+Person).", "ta(-Course, +Person, -Quarter).", "ta(+Course, -Person, +Quarter).", "ta(+Course, +Person, -Quarter).", "taughtby(-Course, +Person, -Quarter).", "taughtby(+Course, -Person, +Quarter).", "taughtby(+Course, +Person, -Quarter).", "tempadvisedby(-Person, +Person).", "tempadvisedby(+Person, -Person).", "yearsinprogram(+Person, #Integer).", ], ) clf = BoostedRDN( background=bk,