Python from_pandas Beispiele, causalnex.structure.notears.from_pandas Python Beispiele

Beispiel #1

0

Datei anzeigen

    def test_tabu_edges_on_non_existing_edges_do_nothing(self, train_data_idx):
        """If tabu edges do not exist in the original unconstrained network then nothing changes"""

        g1 = from_pandas(train_data_idx, w_threshold=0.3)
        g2 = from_pandas(train_data_idx,
                         w_threshold=0.3,
                         tabu_edges=[("a", "d"), ("e", "a")])
        assert set(g1.edges) == set(g2.edges)

Beispiel #2

0

Datei anzeigen

    def test_empty_data_raises_error(self):
        """
        Providing an empty data set should result in a Value Error explaining that data must not be empty.
        This error is useful to catch and handle gracefully, because otherwise the user would experience
        misleading division by zero, or unpacking errors.
        """

        with pytest.raises(ValueError):
            from_pandas(pd.DataFrame(data=[], columns=["a"]))

Beispiel #3

0

Datei anzeigen

    def test_single_iter_gets_converged_fail_warnings(self, train_data_idx):
        """
        With a single iteration on this dataset, learn_structure fails to converge and should give warnings.
        """

        with pytest.warns(
                UserWarning,
                match="Failed to converge. Consider increasing max_iter."):
            from_pandas(train_data_idx, max_iter=1)

Beispiel #4

0

Datei anzeigen

 def test_array_with_nan_raises_error(self):
     """
     Providing a data set including nan should result in a Value Error explaining that data contains nan.
     This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
     """
     with pytest.raises(
             ValueError,
             match=
             "Input contains NaN, infinity or a value too large for dtype*",
     ):
         from_pandas(pd.DataFrame(data=[np.nan, 0], columns=["a"]))

Beispiel #5

0

Datei anzeigen

    def test_no_cycles(self, train_data_idx):
        """
        The learned structure should be acyclic
        """

        g = from_pandas(train_data_idx, w_threshold=0.3)
        assert nx.algorithms.is_directed_acyclic_graph(g)

Beispiel #6

0

Datei anzeigen

    def test_certain_relationships_get_near_certain_weight(self):
        """If observations reliably show a==b and !a==!b then the relationship from a->b should be certain"""

        data = pd.DataFrame([[0, 1] for _ in range(10)], columns=["a", "b"])
        g = from_pandas(data)
        assert all(0.99 <= weight <= 1
                   for u, v, weight in g.edges(data="weight")
                   if u == 0 and v == 1)

Beispiel #7

0

Datei anzeigen

    def test_behaves_same_as_seperate_calls(self, train_data_idx, train_data_discrete):
        bn1 = BayesianNetwork(from_pandas(train_data_idx, w_threshold=0.3))
        bn2 = BayesianNetwork(from_pandas(train_data_idx, w_threshold=0.3))

        bn1.fit_node_states(train_data_discrete).fit_cpds(train_data_discrete)
        bn2.fit_node_states_and_cpds(train_data_discrete)

        assert bn1.edges == bn2.edges
        assert bn1.node_states == bn2.node_states

        cpds1 = bn1.cpds
        cpds2 = bn2.cpds

        assert cpds1.keys() == cpds2.keys()

        for k in cpds1:
            assert cpds1[k].equals(cpds2[k])

Beispiel #8

0

Datei anzeigen

    def test_inverse_relationships_get_negative_weight(self):
        """If observations indicate a==!b and b==!a then the weight of the relationship from a-> should be negative"""

        data = pd.DataFrame([[0, 1] for _ in range(10)], columns=["a", "b"])
        data.append(
            pd.DataFrame([[1, 0] for _ in range(10)], columns=["a", "b"]))
        g = from_pandas(data)
        assert all(weight < 0 for u, v, weight in g.edges(data="weight")
                   if u == 0 and v == 1)

Beispiel #9

0

Datei anzeigen

Datei: test_metrics.py Projekt: zeta1999/causalnex

    def test_report_ignores_unrequired_columns_in_data(self, train_data_idx,
                                                       train_data_discrete,
                                                       test_data_c_discrete):
        """Classification report should ignore any columns that are no needed by predict"""

        bn = BayesianNetwork(
            from_pandas(train_data_idx,
                        w_threshold=0.3)).fit_node_states(train_data_discrete)
        train_data_discrete["NEW_COL"] = [1] * len(train_data_discrete)
        bn.fit_cpds(train_data_discrete)
        classification_report(bn, test_data_c_discrete, "c")

Beispiel #10

0

Datei anzeigen

    def test_query_when_cpds_not_fit(self, train_data_idx, train_data_discrete):
        """An error should be raised if query before CPDs are fit"""

        bn = BayesianNetwork(
            from_pandas(train_data_idx, w_threshold=0.3)
        ).fit_node_states(train_data_discrete)

        with pytest.raises(
            ValueError, match=r"Bayesian Network does not contain any CPDs.*"
        ):
            InferenceEngine(bn)

Beispiel #11

0

Datei anzeigen

    def test_multiple_tabu(self, train_data_idx):
        """Any edge related to tabu edges/parent nodes/child nodes should not exist in the network"""

        tabu_e = [("d", "a"), ("b", "c")]
        tabu_p = ["b"]
        tabu_c = ["a", "d"]
        g = from_pandas(
            train_data_idx,
            tabu_edges=tabu_e,
            tabu_parent_nodes=tabu_p,
            tabu_child_nodes=tabu_c,
        )
        assert [e not in g.edges for e in tabu_e]
        assert [p not in [e[0] for e in g.edges] for p in tabu_p]
        assert [c not in [e[1] for e in g.edges] for c in tabu_c]

Beispiel #12

0

Datei anzeigen

    def test_non_numeric_data_raises_error(self):
        """Only numeric data frames should be supported"""

        with pytest.raises(ValueError,
                           match="All columns must have numeric data.*"):
            from_pandas(pd.DataFrame(data=["x"], columns=["a"]))

Beispiel #13

0

Datei anzeigen

Datei: causal_model.py Projekt: rahlk/CADET

 def learn_notears(self, df, tabu_edges, thres):
     """This function is used to learn model using NOTEARS"""
     sm = from_pandas(df, tabu_edges=tabu_edges, w_threshold=thres)
     return sm, sm.edges

Beispiel #14

0

Datei anzeigen

    def test_expected_structure_learned(self, train_data_idx, train_model):
        """Given a small data set that can be examined by hand, the structure should be deterministic"""

        g = from_pandas(train_data_idx, w_threshold=0.3)
        assert set(g.edges) == set(train_model.edges)

Beispiel #15

0

Datei anzeigen

    def test_isolated_nodes_exist(self, train_data_idx):
        """Isolated nodes should still be in the learned structure"""

        g = from_pandas(train_data_idx, w_threshold=1.0)
        assert len(g.nodes) == len(train_data_idx.columns)

Beispiel #16

0

Datei anzeigen

    def test_all_columns_in_structure(self, train_data_idx):
        """Every columns that is in the data should become a node in the learned structure"""

        g = from_pandas(train_data_idx)
        assert len(g.nodes) == len(train_data_idx.columns)

Beispiel #17

0

Datei anzeigen

    def test_sparsity_against_without_reg(self, train_data_idx):
        """Structure learnt from regularisation should be sparser than the one without"""

        g1 = from_pandas_lasso(train_data_idx, 0.1, w_threshold=0.3)
        g2 = from_pandas(train_data_idx, w_threshold=0.3)
        assert len(g1.edges) > len(g2.edges)

Beispiel #18

0

Datei anzeigen

Datei: conftest.py Projekt: shaya7/causalnex

def bn(train_data_idx, train_data_discrete) -> BayesianNetwork:
    return BayesianNetwork(
        from_pandas(train_data_idx, w_threshold=0.3)
    ).fit_node_states_and_cpds(train_data_discrete)

Beispiel #19

0

Datei anzeigen

    def test_tabu_expected_child_nodes(self, train_data_idx):
        """Tabu child nodes should not have any ingoing edges"""

        tabu_c = ["a", "d", "b"]
        g = from_pandas(train_data_idx, tabu_child_nodes=tabu_c)
        assert [c not in [e[1] for e in g.edges] for c in tabu_c]

Beispiel #20

0

Datei anzeigen

    def test_tabu_expected_parent_nodes(self, train_data_idx):
        """Tabu parent nodes should not have any outgoing edges"""

        tabu_p = ["a", "d", "b"]
        g = from_pandas(train_data_idx, tabu_parent_nodes=tabu_p)
        assert [p not in [e[0] for e in g.edges] for p in tabu_p]

Beispiel #21

0

Datei anzeigen

    def test_tabu_expected_edges(self, train_data_idx):
        """Tabu edges should not exist in the network"""

        tabu_e = [("d", "a"), ("b", "c")]
        g = from_pandas(train_data_idx, tabu_edges=tabu_e)
        assert [e not in g.edges for e in tabu_e]

Beispiel #22

0

Datei anzeigen

Datei: conftest.py Projekt: quantumblacklabs/causalnex

def bn(train_data_idx, train_data_discrete) -> BayesianNetwork:
    """Perform structure learning and CPD estimation"""
    return BayesianNetwork(from_pandas(
        train_data_idx,
        w_threshold=0.3)).fit_node_states_and_cpds(train_data_discrete)

Beispiel #23

0

Datei anzeigen

Datei: CarDemo_LearnedGraph.py Projekt: statisticallyfit/PythonProbabilisticGraphicalModels

# from src.utils.Clock import *
def clock(startTime, endTime):
    elapsedTime = endTime - startTime
    elapsedMins = int(elapsedTime / 60)
    elapsedSecs = int(elapsedTime - (elapsedMins * 60))
    return elapsedMins, elapsedSecs


# %% codecell
from causalnex.structure.notears import from_pandas
import time

startTime: float = time.time()

carStructLearned = from_pandas(X=labelEncData)

print(f"Time taken = {clock(startTime = startTime, endTime = time.time())}")

# %% codecell
from IPython.display import Image
from causalnex.plots import plot_structure, NODE_STYLE, EDGE_STYLE

# Now visualize it:
viz = plot_structure(carStructLearned,
                     graph_attributes={"scale": "0.5"},
                     all_node_attributes=NODE_STYLE.WEAK,
                     all_edge_attributes=EDGE_STYLE.WEAK)
filename_carLearned = curPath + "car_learnedStructure.png"

viz.draw(filename_carLearned)

Beispiel #24

0

Datei anzeigen

Datei: fitting_the_condi_dist_of_the_Bayesian_Network.py Projekt: LotusSmile/causalnex_tutorial

G3_map = {0: "Fail", 1: "Pass"}

discretised_data["absences"] = discretised_data["absences"].map(absences_map)
discretised_data["G1"] = discretised_data["G1"].map(G1_map)
discretised_data["G2"] = discretised_data["G2"].map(G2_map)
discretised_data["G3"] = discretised_data["G3"].map(G3_map)


# 데이터 분할
train, test = train_test_split(discretised_data, train_size=0.9, test_size=0.1, random_state=7)


# 데이터 구조 모델 (2~3분 소요)
start = time.time()

sm = from_pandas(structure_data)
sm.remove_edges_below_threshold(0.8)

sm = from_pandas(structure_data, tabu_edges=[("higher", "Medu")], w_threshold=0.8)
sm.add_edge("failures", "G1")
sm.remove_edge("Pstatus", "G1")
sm.remove_edge("address", "G1")

sm = sm.get_largest_subgraph()

end = time.time() - start
print(int(end))


# 베이지안 네트워크 모델 선언
bn = BayesianNetwork(sm)

Beispiel #25

0

Datei anzeigen

Datei: first_model.py Projekt: lusuelves/CreditRisk

#First causal nex model

from causalnex.structure import StructureModel
from causalnex.plots import plot_structure
import pandas as pd
from causalnex.structure.notears import from_pandas
from causalnex.network import BayesianNetwork
from sklearn.model_selection import train_test_split

data = pd.read_csv('../data/hmeq_clean.csv', delimiter=',')
data = data.apply(pd.to_numeric, errors='coerce')
data.drop(columns=['Unnamed: 0'], inplace=True)

sm = from_pandas(data)


def determine_structure():

    _, _, _ = plot_structure(sm)

    sm.remove_edges_below_threshold(0.8)
    _, _, _ = plot_structure(sm)
    """
    Now I have to determine what relationships are right.
    I can see that BAD determines VALUE and MORTDUE when it should be the other way
    round. SO I am going to change the arrows. 
    """
    sm.remove_edge("BAD", "VALUE")
    sm.remove_edge("BAD", "MORTDUE")
    sm.remove_edge("BAD", "LOAN")
    sm.add_edge("MORTDUE", "BAD")

Beispiel #26

0

Datei anzeigen

genotypes = pd.concat(cultivar, axis=1)
genotype_uniq = genotypes.drop_duplicates()
genotype_uniq.set_axis(['genotype', 'encoding'], axis=1, inplace=True)
genotype_map = dict(zip(genotype_uniq.genotype, genotype_uniq.encoding))

# hardcoded seasons as dict
season_map = dict({'season_4': 0, 'season_6': 1})

with open("~/work/phenophasebbn/bbn/genotype_map.json", "w") as outfile:
    json.dump(genotype_map, outfile)
with open("~/work/phenophasebbn/bbn/season_map.json", "w") as outfile:
    json.dump(season_map, outfile)

# learn structure with NOTEARS, over 1000 iterations,and keep edge weights > 0.95
from causalnex.structure.notears import from_pandas
sm = from_pandas(X=dum_df, max_iter=1000, w_threshold=0.95)
#pickle the structure model
import pickle
# make pickle file binary
smp = open("~/work/phenophasebbn/bbn/nt_sm", "wb")
# dump the pickle; syntax = (model, filename)
pickle.dump(sm, smp)
# close the pickle
smp.close()

#output plot of learned graph
# no need to apply thresholding, since this is taken care of in the sm with w_threshold
from causalnex.plots import plot_structure
viz = plot_structure(sm)
viz.draw("sm_plot.png")

Beispiel #27

0

Datei anzeigen

Datei: bbn_structure.py Projekt: rbartelme/phenophasebbn

print("Finished writing metadata for encoding categoricals...")

print("Begin embedding expert knowledge into DAG...")

# learn structure with NOTEARS, over 1000 iterations,and keep edge weights > 0.95
#device = torch.cuda.is_available()
#print('GPU is available:', device)

print(
    "Attempting NO TEARS DAG structure learning with tabu edges and child noodes..."
)
from causalnex.structure.notears import from_pandas

learned_sm = from_pandas(X=dum_df,
                         max_iter=10,
                         w_threshold=0.95,
                         tabu_edges=bl_tup,
                         tabu_child_nodes=["season"])

print("Finished structure learning...begin pickling structure model.")
##pickle the learned structure model
# make pickle file binary
smp = open("/work/phenophasebbn/bbn/notears_sm.pickle", "wb")
# dump the pickle; syntax = (model, filename)
pickle.dump(learned_sm, smp)
# close the pickle
smp.close()

#print("Generating image of final DAG...")
#output plot of learned graph
# no need to apply thresholding, since this is taken care of in the sm with w_threshold

Beispiel #28

0

Datei anzeigen

assert list(
    labelEncoder.fit_transform(y=testMultivals)) == [0, 1, 2, 3, 4, 5, 6, 7]

# %% markdown [markdown]
# Now apply the NOTEARS algo to learn the structure:

# %% codecell

from src.utils.Clock import *

from causalnex.structure.notears import from_pandas
import time

startTime: float = time.time()

structureModelLearned = from_pandas(X=labelEncData)

print(f"Time taken = {clock(startTime = startTime, endTime = time.time())}")

# %% codecell
# Now visualize it:
viz = plot_structure(structureModelLearned,
                     graph_attributes={"scale": "0.5"},
                     all_node_attributes=NODE_STYLE.WEAK,
                     all_edge_attributes=EDGE_STYLE.WEAK)
filename_learned = curPath + "structure_model_learnedStructure.png"

viz.draw(filename_learned)
Image(filename_learned)

# %% markdown [markdown]