def test_predict_proba_test_data(): """Assert arrays are almost equal on output of predict_proba()""" _bk = Background(modes=example_data.train.modes, use_std_logic_variables=True) _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=5) _dn.fit(example_data.train) assert_array_almost_equal( _dn.predict_proba(example_data.test), np.array([0.74, 0.74, 0.74, 0.25, 0.25]), decimal=2, )
def test_predict_proba_test_data(): """Assert arrays are almost equal on output of predict_proba()""" train, test = load_toy_cancer() _bk = Background(modes=train.modes) _dn = BoostedRDN(background=_bk, target="cancer", n_estimators=5) _dn.fit(train) assert_array_almost_equal( _dn.predict_proba(test), np.array([0.74, 0.74, 0.74, 0.25, 0.25]), decimal=2, )
test_db = Database() test_db.pos = [ "father(elizabeth,mrbennet).", "father(jane,mrbennet).", "father(charlotte,mrlucas).", ] test_db.neg = [ "father(charlotte,mrsbennet).", "father(jane,mrlucas).", "father(mrsbennet,mrbennet).", "father(jane,elizabeth).", ] test_db.facts = [ "male(mrbennet).", "male(mrlucas).", "male(darcy).", "childof(mrbennet,elizabeth).", "childof(mrsbennet,elizabeth).", "childof(mrbennet,jane).", "childof(mrsbennet,jane).", "childof(mrlucas,charlotte).", "childof(mrslucas,charlotte).", "siblingof(jane,elizabeth).", "siblingof(elizabeth,jane).", ] print(clf.predict_proba(test_db))
target='cancer', max_tree_depth=2, node_size=2, n_estimators=20, ) clf.fit(example_data.train) x = np.arange(1, 21) y_pos = [] y_neg = [] thresholds = [] for n_trees in x: clf.set_params(n_estimators=n_trees) probs = clf.predict_proba(example_data.test) thresholds.append(clf.threshold_) y_pos.append(np.mean(probs[np.nonzero(clf.classes_)])) y_neg.append(np.mean(probs[clf.classes_ == 0])) thresholds = np.array(thresholds) y_pos = np.array(y_pos) y_neg = np.array(y_neg) plt.plot(x, y_pos, "b-", label="Mean Probability of positive examples") plt.plot(x, y_neg, "r-", label="Mean Probability of negative examples") plt.plot(x, thresholds, "k--", label="Margin") plt.title("Class Probability vs. Number Trees") plt.xlabel("Number of Trees") plt.ylabel("Probability of belonging to Positive Class")
plot_digraph(export_digraph(clf, 0), format="html") # %% # There is some variance between runs, but in the concept that the # trees pick up on is roughly that "*A father has a child and is male.*" plot_digraph(export_digraph(clf, 1), format="html") # %% # Here the data is fairly complete, and the concept that "*A father has a # child and is male*" seems sufficient for the purposes of this data. # Let's apply our learned model to the test data, which includes facts # about characters from Jane Austen's *Pride and Prejudice.* predictions = clf.predict_proba(test) print("{:<35} {}".format("Predicate", "Probability of being True"), "\n", "-" * 60) for predicate, prob in zip(test.pos + test.neg, predictions): print("{:<35} {:.2f}".format(predicate, prob)) # %% # The confidence might be a little low, which is a good excuse to mention # one of the hyperparameters. "Node Size," or ``node_size`` corresponds to # the maximum number of predicates that can be used as a split in the # dependency network. We set ``node_size=1`` above for demonstration, but the # concept that seems to be learned: ``father(A, B) = [childof(B, A), male(B)]`` # is of size 2. # # We might be able to learn a better model by taking this new information
target="cancer", max_tree_depth=2, node_size=2, n_estimators=20, ) clf.fit(train) x = np.arange(1, 21) y_pos = [] y_neg = [] thresholds = [] for n_trees in x: clf.set_params(n_estimators=n_trees) probs = clf.predict_proba(test) thresholds.append(clf.threshold_) y_pos.append(np.mean(probs[np.nonzero(clf.classes_)])) y_neg.append(np.mean(probs[clf.classes_ == 0])) thresholds = np.array(thresholds) y_pos = np.array(y_pos) y_neg = np.array(y_neg) plt.plot(x, y_pos, "b-", label="Mean Probability of positive examples") plt.plot(x, y_neg, "r-", label="Mean Probability of negative examples") plt.plot(x, thresholds, "k--", label="Margin") plt.title("Class Probability vs. Number Trees") plt.xlabel("Number of Trees") plt.ylabel("Probability of belonging to Positive Class")