def visualize(self, nodes, edges, fname): """This function is used to visualize the causal model using graphviz""" from causalgraphicalmodels import CausalGraphicalModel import graphviz try: graph = CausalGraphicalModel(nodes=nodes, edges=edges) graph.draw().render(filename=fname) except AssertionError: print("[ERROR]: cycles in NOTEARS dag") print("Edges: {0}".format(edges))
def get_causal_graph_from_nxgraph(nx_graph): """ Create a CausalGraphicalModel from an nxgraph. """ # create a causal graph causal_graph = CausalGraphicalModel(nodes=nx_graph.nodes(), edges=nx_graph.edges()) return causal_graph
def get_causal_graph_from_bif(bif_reader): """ Create a CausalGraphicalModel from an bif file. """ # create a causal graph causal_graph = CausalGraphicalModel(nodes=bif_reader.get_variables(), edges=bif_reader.get_edges()) return causal_graph
def get_graphical_network_model_from_dict(network_spec): edges = [] for end_node, expression in network_spec["dependencies"]: start_nodes = [ variable_name for variable_name in network_spec["variable_names"] if variable_name in expression ] edges_for_dep = list(itertools.product(start_nodes, end_node)) edges.extend(edges_for_dep) edges = list(set(edges)) return CausalGraphicalModel(nodes=network_spec["variable_names"], edges=edges)
def show(self): self.dag = self.create_dag() for node_key, node_items in self.nodes.items(): self.dag.nodes.add(node_key) for e in node_items['edges']: self.dag.edges.add((node_key, e)) self.dag.coordinates[node_key] = (node_items['coordinate']) self.dag.gm = CausalGraphicalModel(nodes=self.dag.nodes, edges=self.dag.edges) self.dag.pgm = daft.PGM() pgm = daft.PGM() for node in self.dag.gm.dag.nodes: pgm.add_node(node, node, *self.dag.coordinates[node]) for edge in self.dag.gm.dag.edges: pgm.add_edge(*edge) pgm.render() plt.gca() #.invert_yaxis()
def get_causal_graph_with_latent_edges(nx_graph, incoming_latent_edges): """ Create a CausalGraphicalModel from an nxgraph with unobserved latent edges. """ # only add edges with where one of the nodes exist latent_edges = [] for l in incoming_latent_edges: node1 = l[0] node2 = l[1] if node1 in nx_graph.nodes() or node2 in nx_graph.nodes(): latent_edges.append(l) # create a causal graph causal_graph = CausalGraphicalModel(nodes=nx_graph.nodes(), edges=nx_graph.edges(), latent_edges=latent_edges) return causal_graph
def create_network_from_file(file_name): with open(file_name) as f: random_seed = f.readline().strip().split("random_seed: ")[1] random_seed = int(random_seed) if random_seed != "no_seed" else 42 random.seed(random_seed) np.random.seed(random_seed) variable_names = f.readline().strip().split( "variable_names: ")[1].split(" ") nsamp = int(f.readline().strip().split("number_of_samples: ")[1]) noise_mean, noise_variance = map( float, f.readline().strip().split("normal_noise: ")[1].split(" ")) df = pd.DataFrame(np.random.normal(noise_mean, noise_variance, size=(nsamp, len(variable_names))), columns=variable_names) number_of_generating_functions = int( f.readline().strip().split("number_of_generating_functions: ")[1]) for i in range(number_of_generating_functions): var_name, min_v, max_v = f.readline().strip().split(" ") df[var_name] = np.random.uniform(float(min_v), float(max_v), nsamp) rest_of_dependencies = map(str.strip, f.readlines()) edges = [] noise_and_generating = df.copy() for dep in rest_of_dependencies: end_node, expression = dep.split(" = ") start_nodes = [ variable_name for variable_name in variable_names if variable_name in expression ] edges_for_dep = list(itertools.product(start_nodes, end_node)) df[dep.split(" = ")[0]] += df.eval(dep.split(" = ")[1]) edges.extend(edges_for_dep) Bayesian_network = CausalGraphicalModel(nodes=variable_names, edges=edges) return df, Bayesian_network, noise_and_generating
def create_network_from_dict(network_spec): """ Network_spec partial description: variable_names - list generating_functions - list of triples dependencies - list of pairs """ random.seed(network_spec["random_seed"]) np.random.seed(network_spec["random_seed"]) df = pd.DataFrame(np.random.normal( network_spec["noise_mean"], network_spec["variance"], size=(network_spec["nsamp"], len(network_spec["variable_names"]))), columns=network_spec["variable_names"]) number_of_generating_functions = int( network_spec["number_of_generating_functions"]) for var_name, min_v, max_v in network_spec["generating_functions"]: df[var_name] = np.random.uniform(float(min_v), float(max_v), network_spec["nsamp"]) edges = [] noise_and_generating = df.copy() for end_node, expression in network_spec["dependencies"]: start_nodes = [ variable_name for variable_name in network_spec["variable_names"] if variable_name in expression ] edges_for_dep = list(itertools.product(start_nodes, end_node)) df[end_node] += df.eval(expression) edges.extend(edges_for_dep) edges = list(set(edges)) Bayesian_network = CausalGraphicalModel( nodes=network_spec["variable_names"], edges=edges) return df, Bayesian_network, noise_and_generating
pl.plot(A, D, color='k', alpha=0.1) pl.plot(A, D, color='k', alpha=0.1) pl.plot(A, D, color='k', alpha=0.1) d['M'] = scale(d.Marriage.values, with_std=False) with pm.Model() as m5_2: a = pm.Normal('a', 0, 0.2) bM = pm.Normal('bM', 0, 0.5) mu = a + bM * d.M sigma = pm.Exponential('sigma', lam=1) D = pm.Normal('D', mu, sigma, observed=d.D) div_dag = CausalGraphicalModel(nodes=['A', 'M', 'D'], edges=[('A', 'M'), ('A', 'D'), ('M', 'D')]) div_dag2 = CausalGraphicalModel(nodes=['A', 'M', 'D'], edges=[('A', 'M'), ('A', 'D')]) with m5_1: trace_5_1 = pm.sample(1000, tune=1000) with m5_2: trace_5_2 = pm.sample(1000, tune=1000) pm.forestplot(trace_5_1, varnames=['a', 'bA', 'sigma']) pm.forestplot(trace_5_2, varnames=['a', 'bM', 'sigma']) # The above is consistent with two causal DAGS: div_dag.draw() # and div_dag2.draw() """ We need a model that CONTROLS FOR A while assessing the association between M and D
causal_graph = CausalGraphicalModel( nodes=[ "Travel_Time", "Travel_Distance", "Travel_Cost", "Cross_Bay_Bridge", "HH_Size", "num_of_kids_household", "Autos_per_licensed_drivers", "Gender", "Mode_Choice", ], edges=[ ("Travel_Time", "Mode_Choice"), ("Travel_Distance", "Mode_Choice"), ("Travel_Cost", "Mode_Choice"), ("Cross_Bay_Bridge", "Mode_Choice"), ("HH_Size", "Mode_Choice"), ("num_of_kids_household", "Mode_Choice"), ("Autos_per_licensed_drivers", "Mode_Choice"), ("Gender", "Mode_Choice"), ("Travel_Distance", "Travel_Time"), ("Travel_Distance", "Travel_Cost"), ("Travel_Distance", "Cross_Bay_Bridge"), ("HH_Size", "Travel_Distance"), # ("Travel_Time", "Mode_Choice"), # ("Travel_Time", "Mode_Choice"), ], )
"info") simulator.registerNormalSampler(7000, 1000) simulator.registerNormalSampler(7000, 1000) simulator.registerGammaSampler(1.0, .05) simulator.registerDiscreteRejectSampler(0.0, 0.20, 0.04, 25, 30, 18, 10, 5, 2) simulator.run() elif op == "grmo": bo = CausalGraphicalModel(nodes=[ "dem", "prevDem", "partMarg", "prodDownTm", "partOrd", "boPartOrd", "prCap", "boPrCap", "bo", "profit" ], edges=[("dem", "boPartOrd"), ("prevDem", "partOrd"), ("partMarg", "partOrd"), ("partOrd", "boPartOrd"), ("prodDownTm", "prCap"), ("prCap", "boPrCap"), ("dem", "boPrCap"), ("boPartOrd", "bo"), ("boPrCap", "bo")]) bo.draw() plt.show() elif op == "train": prFile = sys.argv[2] regressor = FeedForwardNetwork(prFile) regressor.buildModel() FeedForwardNetwork.batchTrain(regressor)
"Cross Bay Trip", "Number of Licensed Drivers", "Number of Autos", "Utility (Drive Alone)", ] EDGES_IND = [ ("Total Travel Distance", "Utility (Drive Alone)"), ("Total Travel Time", "Utility (Drive Alone)"), ("Total Travel Cost", "Utility (Drive Alone)"), ("Cross Bay Trip", "Utility (Drive Alone)"), ("Number of Licensed Drivers", "Utility (Drive Alone)"), ("Number of Autos", "Utility (Drive Alone)"), ] IND_UTILITY = CausalGraphicalModel(NODES_IND, EDGES_IND) # Drive Alone NODES_DA = [ "Total Travel Distance", "Total Travel Time", "Total Travel Cost", "Cross Bay Trip", "Number of Autos", "Number of Licensed Drivers", "Utility (Drive Alone)", ] EDGES_DA = [ ("Total Travel Distance", "Total Travel Time"), ("Total Travel Distance", "Total Travel Cost"),
# - # ## Assumed causal graph # + drive_alone_graph = CausalGraphicalModel( nodes=[ 'total_travel_time', 'total_travel_cost', 'total_travel_distance', #'household_income', 'household_size', 'num_cars', 'cross_bay', 'utility_driving' ], edges=[ ("total_travel_time", "utility_driving"), ("total_travel_cost", "utility_driving"), ("total_travel_distance", "utility_driving"), ("household_size", "utility_driving"), # ("household_income", "utility_driving"), ("num_cars", "utility_driving"), ("cross_bay", "utility_driving"), ("total_travel_distance", "total_travel_time"), ("total_travel_distance", "total_travel_cost"), ]) # draw return a graphviz `dot` object, which jupyter can render drive_alone_graph.draw() # -
parking_congestion, parking_capacity, previous_arrivals, explanatory_features, utility, choice, ] edges = [ (parking_duration, parking_cost), (vehicle_type, parking_fines), (parking_tariffs, parking_cost), (parking_fines, parking_cost), (parking_duration, parking_fines), (vehicle_type, choice_set), (choice_set, choice), (parking_cost, explanatory_features), (explanatory_features, utility), (utility, choice), (vehicle_attributes, explanatory_features), (parking_congestion, explanatory_features), (parking_capacity, parking_congestion), (previous_arrivals, parking_congestion), ] latent_edges = [(parking_congestion, utility)] PARKING_CAUSAL_MODEL = CausalGraphicalModel( nodes=nodes, edges=edges, latent_edges=latent_edges )
""" import numpy as np import pandas as pd import pymc3 as pm import arviz as ar from sklearn.preprocessing import scale import matplotlib.pyplot as pl from causalgraphicalmodels import CausalGraphicalModel from cmocean.cm import balance_r from seaborn import heatmap """ Example: infer direct influence of both parents (P) and grand parents (G) on the educational achievement of children (C). """ dag_ed1 = CausalGraphicalModel(nodes=['P', 'G', 'C'], edges=[('G', 'P'), ('G', 'C'), ('P', 'C')]) dag_ed1.draw() """ But we suppose ther are unmeasured, common influences on parents and their children (e.g. neighborhoods, not shared by grandparent who live elsewhere). """ dag_ed2 = CausalGraphicalModel(nodes=['G', 'P', 'C', 'U'], edges=[('G', 'P'), ('U', 'P'), ('G', 'C'), ('P', 'C'), ('U', 'C')]) dag_ed2.draw() """ The DAG above implies that: (1) P is some function of G and U (2) C is some function of G, P, and U (3) G and U are not functions of any other known variables.
# -*- coding: utf-8 -*- """ A graphical model to demonstrate conditional independence. Shows a graph that assumes travel cost is conditionally independent of travel time, conditional on travel distance. """ from causalgraphicalmodels import CausalGraphicalModel TRAVEL_TIME = "Travel Time" TRAVEL_COST = "Travel Cost" TRAVEL_DISTANCE = "Travel Distance" EXAMPLE_GRAPH = CausalGraphicalModel( nodes=[TRAVEL_TIME, TRAVEL_COST, TRAVEL_DISTANCE], edges=[(TRAVEL_DISTANCE, TRAVEL_TIME), (TRAVEL_DISTANCE, TRAVEL_COST)], )
import daft import matplotlib.pyplot as plt from causalgraphicalmodels import CausalGraphicalModel plant_dag = CausalGraphicalModel(nodes=["H0", "H1", "F", "T"], edges=[("H0", "H1"), ("F", "H1"), ("T", "F")]) pgm = daft.PGM() coordinates = {"H0": (0, 0), "T": (4, 0), "F": (3, 0), "H1": (2, 0)} for node in plant_dag.dag.nodes: pgm.add_node(node, node, *coordinates[node]) for edge in plant_dag.dag.edges: pgm.add_edge(*edge) pgm.render() plt.gca().invert_yaxis() plt.show()
# -*- coding: utf-8 -*- """ Created on Mon Jun 29 09:55:00 2020 @author: Mhuth """ from causalgraphicalmodels import CausalGraphicalModel nod = [ 'region', 'oral', 'sales bans', 'time', 'ideas about children', 'controls' ] ed = [('region', 'oral'), ('region', 'sales bans'), ('region', 'ideas about children'), ('sales bans', 'oral'), ('time', 'oral'), ('time', 'ideas about children'), ('time', 'controls'), ('ideas about children', 'oral')] cg1 = CausalGraphicalModel(nodes=nod, edges=ed) cg1.draw()
# + SEED = 10 np.random.seed(SEED) tf.random.set_random_seed(SEED) data_generation_graph = CausalGraphicalModel( nodes=['a', 'b', 'c', 'confounder', 'd', 'e', 'f', 'y'], edges=[ ("a", "y"), ("b", "y"), ("c", "y"), ("d", "y"), ("e", "y"), ("f", "y"), ("confounder", "y"), ("confounder", "a"), ("confounder", "b"), # ("confounder", "c"), ("confounder", "d"), ("confounder", "e"), ("confounder", "f"), # ("confounder", "y"), # ("confounder", "y"), ]) # draw return a graphviz `dot` object, which jupyter can render data_generation_graph.draw() # - # ## Specifiy the paramteric relationship between the covariates.
pm.forestplot([trace_7, trace_6, trace_5], models=['m7', 'm6', 'm5'], varnames=['bM', 'bN'], rhat=False, alpha=0.11); pd.plotting.scatter_matrix(dcc[['M', 'N', 'K']]); """ The regression model (m5_7) asks if high N is associated with high K. Likewise m5_7 asks whether high M implies high K. Bigger species like apes (big M) have milk with less energy. But spp with more neocortex (big N) have richer milk (big K). The fact that M and N are correlated makes these relationships difficult to see unless both factors are accounted for. ----o---- Simulating a Masking Relationship. Two predictors (M, N) are correlated with one another, and one (M) is positively correlated with the target (K) while the other (N) is negatively correlated with K """ div_msk = CausalGraphicalModel(nodes=['M', 'N', 'K'], edges=[('M', 'K'), ('N', 'K'), ('M', 'N')]) div_msk.draw() n = 100 M = np.random.normal(size=n) N = np.random.normal(loc=M, size=n) K = np.random.normal(loc=N-M, size=n) d_sim = pd.DataFrame(dict(K=K, M=M, N=N)) pd.plotting.scatter_matrix(d_sim, alpha=0.5, diagonal=); with pm.Model() as m5_sim: a = pm.Normal('a', 0, 0.2) bN = pm.Normal('bN', 0, 0.5) σ = pm.Exponential('sigma', 1)
""" Treatment appears to have negligible effect even though βF posterior indicates fungus impacts growth. The problem is that fungus is a consequence of treatment; i.e. fungus is a post-treatment variable. The model asked the question "Once we know fungus is present does treatment matter?" ⇒ No. The next model ignores the fungus variable """ with pm.Model() as m8: σ = pm.Exponential('σ', 1) α = pm.Lognormal('α', 0, 0.2) βT = pm.Normal('βT', 0, 0.5) p = α + βT * d.treatment.values μ = d.h0.values * p h1 = pm.Normal('h1', mu=μ, sd=σ, observed=d.h1.values) trc8 = pm.sample(tune=1000) pm.summary(trc8) """ Now the treatment effect is plain to see. Note that: 1. It makes sense to control for pre-treatment differences such as initial height, h0, here. 2. Including post-treatment variables can mask the treatment itself. 3. Note that model m7 is still useful to identify the causal mechanism! """ plant_dag = CausalGraphicalModel(nodes=['H0', 'H1', 'T', 'F'], edges=[('H0', 'H1'), ('T', 'F'), ('F', 'H1')]) plant_dag.draw() plant_dag.is_d_separated('T', 'H1') plant_dag.is_d_separated('T', 'H1', 'F') plant_dag.get_all_independence_relationships()
# Compute conditional independencies from a directed graphical model # Uses this library # https://github.com/ijmbarr/causalgraphicalmodels # Code is based on # https://fehiepsi.github.io/rethinking-numpyro/06-the-haunted-dag-and-the-causal-terror.html from causalgraphicalmodels import CausalGraphicalModel dag = CausalGraphicalModel( nodes=["X", "Y", "C", "U", "B", "A"], edges=[ ("X", "Y"), ("U", "X"), ("A", "U"), ("A", "C"), ("C", "Y"), ("U", "B"), ("C", "B"), ], ) all_independencies = dag.get_all_independence_relationships() print(all_independencies) print('\n')
strong-ignorability (`DRIVE_ALONE_UTILITY`) and under the assumptions of unobserved confounding (`LATENT_DRIVE_ALONE_UTILITY`) from "The Blessings of Multiple Causes" (2018) by Wang and Blei. """ from causalgraphicalmodels import CausalGraphicalModel DRIVE_ALONE_UTILITY = CausalGraphicalModel( nodes=[ "Total Travel Distance", "Total Travel Time", "Total Travel Cost", "Number of Autos", "Number of Licensed Drivers", "Utility (Drive Alone)", ], edges=[ ("Total Travel Distance", "Total Travel Time"), ("Total Travel Distance", "Total Travel Cost"), ("Total Travel Distance", "Utility (Drive Alone)"), ("Total Travel Time", "Utility (Drive Alone)"), ("Total Travel Cost", "Utility (Drive Alone)"), ("Number of Autos", "Utility (Drive Alone)"), ("Number of Licensed Drivers", "Utility (Drive Alone)"), ], ) drive_alone_nodes = list(DRIVE_ALONE_UTILITY.dag.nodes) nodes_for_latent_graph = ["confounder"] + drive_alone_nodes edges_for_latent_graph = [("confounder", x) for x in drive_alone_nodes] + [ (x, "Utility (Drive Alone)") for x in drive_alone_nodes[:-1] ]