Ejemplo n.º 1
0
    "childof(mollyweasley,ronweasley).",
    "childof(arthurweasley,fredweasley).",
    "childof(mollyweasley,fredweasley).",
    "childof(arthurweasley,georgeweasley).",
    "childof(mollyweasley,georgeweasley).",
    "childof(arthurweasley,ginnyweasley).",
    "childof(mollyweasley,ginnyweasley).",
    "childof(xenophiliuslovegood,lunalovegood).",
    "childof(cygnusblack,narcissamalfoy).",
]

bk = Background(
    modes=[
        "male(+name).",
        "father(+name,+name).",
        "childof(+name,+name).",
        "siblingof(+name,+name)."
    ],
    number_of_clauses=8,
    use_prolog_variables=True,
)

clf = BoostedRDN(
    background=bk,
    target="father",
    n_estimators=5,
)

clf.fit(train_db)

test_db = Database()
Ejemplo n.º 2
0
_run_times = 10

_train_times = []

for _ in range(_run_times):

    db = Database.from_files(
        pos="datasets/imdb/train1/train1_pos.txt",
        neg="datasets/imdb/train1/train1_neg.txt",
        facts="datasets/imdb/train1/train1_facts.txt",
    )

    bk = Background(modes=[
        "actor(+person).", "movie(+movie, +person).",
        "movie(+movie, -person).", "movie(-movie, +person).",
        "female_gender(+person).", "genre(+person, +genre).",
        "genre(+person, #genre).", "genre(+person, -genre).",
        "genre(-person, +genre).", "workedunder(+person, +person).",
        "workedunder(+person, -person).", "workedunder(-person, +person)."
    ], )

    clf = BoostedRDN(
        background=bk,
        target="workedunder",
        node_size=3,
        max_tree_depth=3,
    )

    _start = time.perf_counter()

    clf.fit(db)
This makes use of :class:`srlearn.example_data`, which provides two `srlearn.Database` objects named
``example_data.train`` and ``example_data.test``.

This shows how the margin between positive and negative examples is maximized as the number of
iterations of boosting increases.
"""

from srlearn.rdn import BoostedRDN
from srlearn import Background
from srlearn import example_data

import numpy as np
import matplotlib.pyplot as plt

bk = Background(
    modes=example_data.train.modes,
    use_std_logic_variables=True,
)

clf = BoostedRDN(
    background=bk,
    target='cancer',
    max_tree_depth=2,
    node_size=2,
    n_estimators=20,
)

clf.fit(example_data.train)

x = np.arange(1, 21)
y_pos = []
y_neg = []
Ejemplo n.º 4
0
_train_times = []

for _ in range(_run_times):

    db = Database.from_files(
        pos="datasets/webkb/train1/train1_pos.txt",
        neg="datasets/webkb/train1/train1_neg.txt",
        facts="datasets/webkb/train1/train1_facts.txt",
    )

    bk = Background(modes=[
        "courseprof(-Course, +Person).",
        "courseprof(+Course, -Person).",
        "courseta(+Course, -Person).",
        "courseta(-Course, +Person).",
        "faculty(+Person).",
        "project(-Proj, +Person).",
        "project(+Proj, -Person).",
        "sameperson(-Person, +Person).",
    ], )

    clf = BoostedRDN(
        background=bk,
        target="faculty",
        node_size=2,
        max_tree_depth=3,
    )

    _start = time.perf_counter()

    clf.fit(db)
# and who is a ``siblingof``.

train.facts

# %%
# Our aim is to learn about what a "*father*" is in terms of the facts we have available.
# This process is usually called *induction,* and is often portrayed as "learning a
# definition of an object."

from srlearn.rdn import BoostedRDN
from srlearn import Background

bk = Background(
    modes=[
        "male(+name).", "father(+name,+name).", "childof(+name,+name).",
        "siblingof(+name,+name)."
    ],
    node_size=1,
    number_of_clauses=8,
)

clf = BoostedRDN(
    background=bk,
    target="father",
    n_estimators=5,
)

clf.fit(train)

# %%
# It's important to check whether we actually learn something useful.
# We'll visually inspect the relational regression trees to see what
train, test = load("webkb", fold=1)

# %%
# We'll set up the learning problem and fit the classifier:

from srlearn.rdn import BoostedRDN
from srlearn import Background

bkg = Background(
    modes=[
        "courseprof(-course,+person).",
        "courseprof(+course,-person).",
        "courseta(+course,-person).",
        "courseta(-course,+person).",
        "project(-proj,+person).",
        "project(+proj,-person).",
        "sameperson(-person,+person).",
        "faculty(+person).",
        "student(+person).",
    ],
    number_of_clauses=8,
)

clf = BoostedRDN(
    background=bkg,
    target="faculty",
    max_tree_depth=3,
    node_size=3,
    n_estimators=10,
)
we use this set to learn a Relational Dependency Network (:class:`srlearn.rdn.BoostedRDN`).

This shows how the margin between positive and negative examples is maximized as the number of
iterations of boosting increases.
"""

from srlearn.rdn import BoostedRDN
from srlearn import Background
from srlearn.datasets import load_toy_cancer

import numpy as np
import matplotlib.pyplot as plt

train, test = load_toy_cancer()

bk = Background(modes=train.modes)

clf = BoostedRDN(
    background=bk,
    target="cancer",
    max_tree_depth=2,
    node_size=2,
    n_estimators=20,
)

clf.fit(train)

x = np.arange(1, 21)
y_pos = []
y_neg = []
thresholds = []
Ejemplo n.º 8
0
        facts="datasets/uwcse/train1/train1_facts.txt",
    )

    bk = Background(modes=[
        "advisedby(+Person, +Person).",
        "courselevel(+Course, #Level).",
        "courselevel(+Course, +Level).",
        "hasposition(+Person, #Position).",
        "inphase(+Person, #Phase).",
        "professor(+Person).",
        "projectmember(-Project, +Person).",
        "projectmember(+Project, -Person).",
        "publication(-Title, +Person).",
        "publication(+Title, -Person).",
        "samecourse(+Course, +Course).",
        "sameperson(+Person, +Person).",
        "sameproject(+Project, +Project).",
        "student(+Person).",
        "ta(-Course, +Person, -Quarter).",
        "ta(+Course, -Person, +Quarter).",
        "ta(+Course, +Person, -Quarter).",
        "taughtby(-Course, +Person, -Quarter).",
        "taughtby(+Course, -Person, +Quarter).",
        "taughtby(+Course, +Person, -Quarter).",
        "tempadvisedby(-Person, +Person).",
        "tempadvisedby(+Person, -Person).",
        "yearsinprogram(+Person, #Integer).",
    ], )

    clf = BoostedRDN(
        background=bk,