def test_opposite(): bn = pbn.SemiparametricBN(["a", "b"]) o = pbn.AddArc("a", "b", 1) oppo = o.opposite(bn) assert oppo.source() == 'a' assert oppo.target() == 'b' assert oppo.delta() == -1 assert type(oppo) == pbn.RemoveArc o = pbn.RemoveArc("a", "b", 1) oppo = o.opposite(bn) assert oppo.source() == 'a' assert oppo.target() == 'b' assert oppo.delta() == -1 assert type(oppo) == pbn.AddArc o = pbn.FlipArc("a", "b", 1) oppo = o.opposite(bn) assert oppo.source() == 'b' assert oppo.target() == 'a' assert oppo.delta() == -1 assert type(oppo) == pbn.FlipArc bn.set_node_type("a", pbn.LinearGaussianCPDType()) o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1) oppo = o.opposite(bn) assert oppo.node() == 'a' assert oppo.node_type() == pbn.LinearGaussianCPDType() assert oppo.delta() == -1 assert type(oppo) == pbn.ChangeNodeType
def test_holdout_local_score_null_spbn(): spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) np.random.seed(0) a_null = np.random.randint(0, SIZE, size=100) b_null = np.random.randint(0, SIZE, size=100) c_null = np.random.randint(0, SIZE, size=100) d_null = np.random.randint(0, SIZE, size=100) df_null = df.copy() df_null.loc[df_null.index[a_null], 'a'] = np.nan df_null.loc[df_null.index[b_null], 'b'] = np.nan df_null.loc[df_null.index[c_null], 'c'] = np.nan df_null.loc[df_null.index[d_null], 'd'] = np.nan hl = pbn.HoldoutLikelihood(df_null, 0.2, seed) assert np.isclose( hl.local_score(spbn, 'a', []), numpy_local_score(pbn.CKDEType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'a', [])) assert np.isclose( hl.local_score(spbn, 'b', ['a']), numpy_local_score(pbn.LinearGaussianCPDType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'b', ['a'])) assert np.isclose( hl.local_score(spbn, 'c', ['a', 'b']), numpy_local_score(pbn.CKDEType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'c', ['a', 'b'])) assert np.isclose( hl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'd', ['a', 'b', 'c'])) assert np.isclose( hl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'd', ['b', 'c', 'a'])) assert hl.local_score(spbn, 'a') == hl.local_score(spbn, 'a', spbn.parents('a')) assert hl.local_score(spbn, 'b') == hl.local_score(spbn, 'b', spbn.parents('b')) assert hl.local_score(spbn, 'c') == hl.local_score(spbn, 'c', spbn.parents('c')) assert hl.local_score(spbn, 'd') == hl.local_score(spbn, 'd', spbn.parents('d'))
def test_cvl_local_score_spbn(): spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) cvl = pbn.CVLikelihood(df, 10, seed) assert np.isclose(cvl.local_score(spbn, 'a', []), numpy_local_score(pbn.CKDEType(), df, 'a', [])) assert np.isclose( cvl.local_score(spbn, 'b', ['a']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'b', ['a'])) assert np.isclose(cvl.local_score(spbn, 'c', ['a', 'b']), numpy_local_score(pbn.CKDEType(), df, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd', ['a', 'b', 'c'])) assert np.isclose( cvl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd', ['b', 'c', 'a'])) assert cvl.local_score(spbn, 'a') == cvl.local_score(spbn, 'a', spbn.parents('a')) assert cvl.local_score(spbn, 'b') == cvl.local_score(spbn, 'b', spbn.parents('b')) assert cvl.local_score(spbn, 'c') == cvl.local_score(spbn, 'c', spbn.parents('c')) assert cvl.local_score(spbn, 'd') == cvl.local_score(spbn, 'd', spbn.parents('d')) assert np.isclose( cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'a', [])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']), numpy_local_score(pbn.CKDEType(), df, 'b', ['a'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c', ['a', 'b']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']), numpy_local_score(pbn.CKDEType(), df, 'd', ['a', 'b', 'c'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']), numpy_local_score(pbn.CKDEType(), df, 'd', ['b', 'c', 'a']))
def test_apply(): gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd']) assert gbn.num_arcs() == 0 assert not gbn.has_arc('a', 'b') o = pbn.AddArc("a", "b", 1) o.apply(gbn) assert gbn.num_arcs() == 1 assert gbn.has_arc('a', 'b') o = pbn.FlipArc("a", "b", 1) o.apply(gbn) assert gbn.num_arcs() == 1 assert not gbn.has_arc('a', 'b') assert gbn.has_arc('b', 'a') o = pbn.RemoveArc("b", "a", 1) o.apply(gbn) assert gbn.num_arcs() == 0 assert not gbn.has_arc('b', 'a') o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1) with pytest.raises(ValueError) as ex: o.apply(gbn) assert "Wrong factor type" in str(ex.value) spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd']) assert spbn.num_arcs() == 0 o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1) assert (spbn.node_type('a') == pbn.UnknownFactorType()) o.apply(spbn) assert (spbn.node_type('a') == pbn.CKDEType()) assert not spbn.has_arc('a', 'b') o = pbn.AddArc("a", "b", 1) o.apply(spbn) assert spbn.num_arcs() == 1 assert spbn.has_arc('a', 'b') o = pbn.FlipArc("a", "b", 1) o.apply(spbn) assert spbn.num_arcs() == 1 assert not spbn.has_arc('a', 'b') assert spbn.has_arc('b', 'a') o = pbn.RemoveArc("b", "a", 1) o.apply(spbn) assert spbn.num_arcs() == 0 assert not spbn.has_arc('b', 'a')
def test_holdout_score(): gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]) hl = pbn.HoldoutLikelihood(df, 0.2, 0) assert np.isclose( hl.score(gbn), (hl.local_score(gbn, 'a', []) + hl.local_score(gbn, 'b', ['a']) + hl.local_score(gbn, 'c', ['a', 'b']) + hl.local_score(gbn, 'd', ['a', 'b', 'c']))) spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) assert np.isclose(hl.score(spbn), (hl.local_score(spbn, 'a') + hl.local_score(spbn, 'b') + hl.local_score(spbn, 'c') + hl.local_score(spbn, 'd')))
def test_cvl_score(): gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]) cv = pbn.CVLikelihood(df, 10, 0) assert np.isclose( cv.score(gbn), (cv.local_score(gbn, 'a', []) + cv.local_score(gbn, 'b', ['a']) + cv.local_score(gbn, 'c', ['a', 'b']) + cv.local_score(gbn, 'd', ['a', 'b', 'c']))) spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) assert np.isclose(cv.score(spbn), (cv.local_score(spbn, 'a') + cv.local_score(spbn, 'b') + cv.local_score(spbn, 'c') + cv.local_score(spbn, 'd')))
def test_holdout_local_score_spbn(): spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) hl = pbn.HoldoutLikelihood(df, 0.2, seed) assert np.isclose( hl.local_score(spbn, 'a', []), numpy_local_score(pbn.CKDEType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'a', [])) assert np.isclose( hl.local_score(spbn, 'b', ['a']), numpy_local_score(pbn.LinearGaussianCPDType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'b', ['a'])) assert np.isclose( hl.local_score(spbn, 'c', ['a', 'b']), numpy_local_score(pbn.CKDEType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'c', ['a', 'b'])) assert np.isclose( hl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'd', ['a', 'b', 'c'])) assert np.isclose( hl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), hl.training_data().to_pandas(), hl.test_data().to_pandas(), 'd', ['b', 'c', 'a'])) assert hl.local_score(spbn, 'a') == hl.local_score(spbn, 'a', spbn.parents('a')) assert hl.local_score(spbn, 'b') == hl.local_score(spbn, 'b', spbn.parents('b')) assert hl.local_score(spbn, 'c') == hl.local_score(spbn, 'c', spbn.parents('c')) assert hl.local_score(spbn, 'd') == hl.local_score(spbn, 'd', spbn.parents('d'))
def test_find_max(): spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd']) cv = pbn.CVLikelihood(df) arcs = pbn.ArcOperatorSet() node_type = pbn.ChangeNodeTypeSet() arcs.cache_scores(spbn, cv) spbn.set_unknown_node_types(df) node_type.cache_scores(spbn, cv) arcs_max = arcs.find_max(spbn) node_max = node_type.find_max(spbn) pool = pbn.OperatorPool([arcs, node_type]) pool.cache_scores(spbn, cv) op_combined = pool.find_max(spbn) if arcs_max.delta() >= node_max.delta(): assert op_combined == arcs_max else: assert op_combined == node_max
def test_cvl_local_score_null_spbn(): spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) np.random.seed(0) a_null = np.random.randint(0, SIZE, size=100) b_null = np.random.randint(0, SIZE, size=100) c_null = np.random.randint(0, SIZE, size=100) d_null = np.random.randint(0, SIZE, size=100) df_null = df.copy() df_null.loc[df_null.index[a_null], 'a'] = np.nan df_null.loc[df_null.index[b_null], 'b'] = np.nan df_null.loc[df_null.index[c_null], 'c'] = np.nan df_null.loc[df_null.index[d_null], 'd'] = np.nan cvl = pbn.CVLikelihood(df_null, 10, seed) assert np.isclose(cvl.local_score(spbn, 'a', []), numpy_local_score(pbn.CKDEType(), df_null, 'a', [])) assert np.isclose( cvl.local_score(spbn, 'b', ['a']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'b', ['a'])) assert np.isclose( cvl.local_score(spbn, 'c', ['a', 'b']), numpy_local_score(pbn.CKDEType(), df_null, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd', ['a', 'b', 'c'])) assert np.isclose( cvl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd', ['b', 'c', 'a'])) assert cvl.local_score(spbn, 'a') == cvl.local_score(spbn, 'a', spbn.parents('a')) assert cvl.local_score(spbn, 'b') == cvl.local_score(spbn, 'b', spbn.parents('b')) assert cvl.local_score(spbn, 'c') == cvl.local_score(spbn, 'c', spbn.parents('c')) assert cvl.local_score(spbn, 'd') == cvl.local_score(spbn, 'd', spbn.parents('d')) assert np.isclose( cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'a', [])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']), numpy_local_score(pbn.CKDEType(), df_null, 'b', ['a'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c', ['a', 'b']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']), numpy_local_score(pbn.CKDEType(), df_null, 'd', ['a', 'b', 'c'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']), numpy_local_score(pbn.CKDEType(), df_null, 'd', ['b', 'c', 'a']))