Ejemplo n.º 1
0
 def visualize(self, nodes, edges, fname):
     """This function is used to visualize the causal model using graphviz"""
     from causalgraphicalmodels import CausalGraphicalModel
     import graphviz
     try:
         graph = CausalGraphicalModel(nodes=nodes, edges=edges)
         graph.draw().render(filename=fname)
     except AssertionError:
         print("[ERROR]: cycles in NOTEARS dag")
         print("Edges: {0}".format(edges))
Ejemplo n.º 2
0
    def get_causal_graph_from_nxgraph(nx_graph):
        """
        Create a CausalGraphicalModel from an nxgraph.
        """
        # create a causal graph
        causal_graph = CausalGraphicalModel(nodes=nx_graph.nodes(),
                                            edges=nx_graph.edges())

        return causal_graph
Ejemplo n.º 3
0
    def get_causal_graph_from_bif(bif_reader):
        """
        Create a CausalGraphicalModel from an bif file.
        """
        # create a causal graph
        causal_graph = CausalGraphicalModel(nodes=bif_reader.get_variables(),
                                            edges=bif_reader.get_edges())

        return causal_graph
Ejemplo n.º 4
0
def get_graphical_network_model_from_dict(network_spec):
    edges = []
    for end_node, expression in network_spec["dependencies"]:
        start_nodes = [
            variable_name for variable_name in network_spec["variable_names"]
            if variable_name in expression
        ]
        edges_for_dep = list(itertools.product(start_nodes, end_node))
        edges.extend(edges_for_dep)
        edges = list(set(edges))
    return CausalGraphicalModel(nodes=network_spec["variable_names"],
                                edges=edges)
Ejemplo n.º 5
0
    def show(self):
        self.dag = self.create_dag()
        for node_key, node_items in self.nodes.items():
            self.dag.nodes.add(node_key)
            for e in node_items['edges']:
                self.dag.edges.add((node_key, e))
            self.dag.coordinates[node_key] = (node_items['coordinate'])

        self.dag.gm = CausalGraphicalModel(nodes=self.dag.nodes,
                                           edges=self.dag.edges)
        self.dag.pgm = daft.PGM()

        pgm = daft.PGM()
        for node in self.dag.gm.dag.nodes:
            pgm.add_node(node, node, *self.dag.coordinates[node])
        for edge in self.dag.gm.dag.edges:
            pgm.add_edge(*edge)
        pgm.render()
        plt.gca()  #.invert_yaxis()
Ejemplo n.º 6
0
    def get_causal_graph_with_latent_edges(nx_graph, incoming_latent_edges):
        """
        Create a CausalGraphicalModel from an nxgraph with unobserved latent edges.
        """

        # only add edges with where one of the nodes exist
        latent_edges = []
        for l in incoming_latent_edges:
            node1 = l[0]
            node2 = l[1]
            if node1 in nx_graph.nodes() or node2 in nx_graph.nodes():
                latent_edges.append(l)

        # create a causal graph
        causal_graph = CausalGraphicalModel(nodes=nx_graph.nodes(),
                                            edges=nx_graph.edges(),
                                            latent_edges=latent_edges)

        return causal_graph
Ejemplo n.º 7
0
def create_network_from_file(file_name):
    with open(file_name) as f:
        random_seed = f.readline().strip().split("random_seed: ")[1]
        random_seed = int(random_seed) if random_seed != "no_seed" else 42

        random.seed(random_seed)
        np.random.seed(random_seed)

        variable_names = f.readline().strip().split(
            "variable_names: ")[1].split(" ")
        nsamp = int(f.readline().strip().split("number_of_samples: ")[1])
        noise_mean, noise_variance = map(
            float,
            f.readline().strip().split("normal_noise: ")[1].split(" "))

        df = pd.DataFrame(np.random.normal(noise_mean,
                                           noise_variance,
                                           size=(nsamp, len(variable_names))),
                          columns=variable_names)

        number_of_generating_functions = int(
            f.readline().strip().split("number_of_generating_functions: ")[1])
        for i in range(number_of_generating_functions):
            var_name, min_v, max_v = f.readline().strip().split(" ")
            df[var_name] = np.random.uniform(float(min_v), float(max_v), nsamp)
        rest_of_dependencies = map(str.strip, f.readlines())
        edges = []
        noise_and_generating = df.copy()
        for dep in rest_of_dependencies:
            end_node, expression = dep.split(" = ")
            start_nodes = [
                variable_name for variable_name in variable_names
                if variable_name in expression
            ]
            edges_for_dep = list(itertools.product(start_nodes, end_node))
            df[dep.split(" = ")[0]] += df.eval(dep.split(" = ")[1])
            edges.extend(edges_for_dep)

    Bayesian_network = CausalGraphicalModel(nodes=variable_names, edges=edges)
    return df, Bayesian_network, noise_and_generating
Ejemplo n.º 8
0
def create_network_from_dict(network_spec):
    """
    Network_spec partial description:
    variable_names - list
    generating_functions - list of triples
    dependencies - list of pairs
    """
    random.seed(network_spec["random_seed"])
    np.random.seed(network_spec["random_seed"])

    df = pd.DataFrame(np.random.normal(
        network_spec["noise_mean"],
        network_spec["variance"],
        size=(network_spec["nsamp"], len(network_spec["variable_names"]))),
                      columns=network_spec["variable_names"])

    number_of_generating_functions = int(
        network_spec["number_of_generating_functions"])
    for var_name, min_v, max_v in network_spec["generating_functions"]:
        df[var_name] = np.random.uniform(float(min_v), float(max_v),
                                         network_spec["nsamp"])

    edges = []
    noise_and_generating = df.copy()
    for end_node, expression in network_spec["dependencies"]:
        start_nodes = [
            variable_name for variable_name in network_spec["variable_names"]
            if variable_name in expression
        ]
        edges_for_dep = list(itertools.product(start_nodes, end_node))
        df[end_node] += df.eval(expression)
        edges.extend(edges_for_dep)
    edges = list(set(edges))
    Bayesian_network = CausalGraphicalModel(
        nodes=network_spec["variable_names"], edges=edges)
    return df, Bayesian_network, noise_and_generating
Ejemplo n.º 9
0
pl.plot(A, D, color='k', alpha=0.1)

pl.plot(A, D, color='k', alpha=0.1)

pl.plot(A, D, color='k', alpha=0.1)

d['M'] = scale(d.Marriage.values, with_std=False)

with pm.Model() as m5_2:
    a = pm.Normal('a', 0, 0.2)
    bM = pm.Normal('bM', 0, 0.5)
    mu = a + bM * d.M
    sigma = pm.Exponential('sigma', lam=1)
    D = pm.Normal('D', mu, sigma, observed=d.D)

div_dag = CausalGraphicalModel(nodes=['A', 'M', 'D'],
                               edges=[('A', 'M'), ('A', 'D'), ('M', 'D')])
div_dag2 = CausalGraphicalModel(nodes=['A', 'M', 'D'],
                                edges=[('A', 'M'), ('A', 'D')])

with m5_1:
    trace_5_1 = pm.sample(1000, tune=1000)
with m5_2:
    trace_5_2 = pm.sample(1000, tune=1000)
pm.forestplot(trace_5_1, varnames=['a', 'bA', 'sigma'])
pm.forestplot(trace_5_2, varnames=['a', 'bM', 'sigma'])
# The above is consistent with two causal DAGS:
div_dag.draw()
# and
div_dag2.draw()
"""
We need a model that CONTROLS FOR A while assessing the association between M and D
causal_graph = CausalGraphicalModel(
    nodes=[
        "Travel_Time",
        "Travel_Distance",
        "Travel_Cost",
        "Cross_Bay_Bridge",
        "HH_Size",
        "num_of_kids_household",
        "Autos_per_licensed_drivers",
        "Gender",
        "Mode_Choice",
    ],
    edges=[
        ("Travel_Time", "Mode_Choice"),
        ("Travel_Distance", "Mode_Choice"),
        ("Travel_Cost", "Mode_Choice"),
        ("Cross_Bay_Bridge", "Mode_Choice"),
        ("HH_Size", "Mode_Choice"),
        ("num_of_kids_household", "Mode_Choice"),
        ("Autos_per_licensed_drivers", "Mode_Choice"),
        ("Gender", "Mode_Choice"),
        ("Travel_Distance", "Travel_Time"),
        ("Travel_Distance", "Travel_Cost"),
        ("Travel_Distance", "Cross_Bay_Bridge"),
        ("HH_Size", "Travel_Distance"),
        #         ("Travel_Time", "Mode_Choice"),
        #         ("Travel_Time", "Mode_Choice"),
    ],
)
Ejemplo n.º 11
0
                                        "info")
        simulator.registerNormalSampler(7000, 1000)
        simulator.registerNormalSampler(7000, 1000)
        simulator.registerGammaSampler(1.0, .05)
        simulator.registerDiscreteRejectSampler(0.0, 0.20, 0.04, 25, 30, 18,
                                                10, 5, 2)
        simulator.run()

    elif op == "grmo":
        bo = CausalGraphicalModel(nodes=[
            "dem", "prevDem", "partMarg", "prodDownTm", "partOrd", "boPartOrd",
            "prCap", "boPrCap", "bo", "profit"
        ],
                                  edges=[("dem", "boPartOrd"),
                                         ("prevDem", "partOrd"),
                                         ("partMarg", "partOrd"),
                                         ("partOrd", "boPartOrd"),
                                         ("prodDownTm", "prCap"),
                                         ("prCap", "boPrCap"),
                                         ("dem", "boPrCap"),
                                         ("boPartOrd", "bo"),
                                         ("boPrCap", "bo")])
        bo.draw()
        plt.show()

    elif op == "train":
        prFile = sys.argv[2]
        regressor = FeedForwardNetwork(prFile)
        regressor.buildModel()
        FeedForwardNetwork.batchTrain(regressor)
Ejemplo n.º 12
0
    "Cross Bay Trip",
    "Number of Licensed Drivers",
    "Number of Autos",
    "Utility (Drive Alone)",
]

EDGES_IND = [
    ("Total Travel Distance", "Utility (Drive Alone)"),
    ("Total Travel Time", "Utility (Drive Alone)"),
    ("Total Travel Cost", "Utility (Drive Alone)"),
    ("Cross Bay Trip", "Utility (Drive Alone)"),
    ("Number of Licensed Drivers", "Utility (Drive Alone)"),
    ("Number of Autos", "Utility (Drive Alone)"),
]

IND_UTILITY = CausalGraphicalModel(NODES_IND, EDGES_IND)

# Drive Alone
NODES_DA = [
    "Total Travel Distance",
    "Total Travel Time",
    "Total Travel Cost",
    "Cross Bay Trip",
    "Number of Autos",
    "Number of Licensed Drivers",
    "Utility (Drive Alone)",
]

EDGES_DA = [
    ("Total Travel Distance", "Total Travel Time"),
    ("Total Travel Distance", "Total Travel Cost"),
# -

# ## Assumed causal graph

# +
drive_alone_graph = CausalGraphicalModel(
    nodes=[
        'total_travel_time',
        'total_travel_cost',
        'total_travel_distance',  #'household_income',
        'household_size',
        'num_cars',
        'cross_bay',
        'utility_driving'
    ],
    edges=[
        ("total_travel_time", "utility_driving"),
        ("total_travel_cost", "utility_driving"),
        ("total_travel_distance", "utility_driving"),
        ("household_size", "utility_driving"),
        #         ("household_income", "utility_driving"),
        ("num_cars", "utility_driving"),
        ("cross_bay", "utility_driving"),
        ("total_travel_distance", "total_travel_time"),
        ("total_travel_distance", "total_travel_cost"),
    ])

# draw return a graphviz `dot` object, which jupyter can render
drive_alone_graph.draw()

# -
Ejemplo n.º 14
0
    parking_congestion,
    parking_capacity,
    previous_arrivals,
    explanatory_features,
    utility,
    choice,
]

edges = [
    (parking_duration, parking_cost),
    (vehicle_type, parking_fines),
    (parking_tariffs, parking_cost),
    (parking_fines, parking_cost),
    (parking_duration, parking_fines),
    (vehicle_type, choice_set),
    (choice_set, choice),
    (parking_cost, explanatory_features),
    (explanatory_features, utility),
    (utility, choice),
    (vehicle_attributes, explanatory_features),
    (parking_congestion, explanatory_features),
    (parking_capacity, parking_congestion),
    (previous_arrivals, parking_congestion),
]

latent_edges = [(parking_congestion, utility)]

PARKING_CAUSAL_MODEL = CausalGraphicalModel(
    nodes=nodes, edges=edges, latent_edges=latent_edges
)
Ejemplo n.º 15
0
"""

import numpy as np
import pandas as pd
import pymc3 as pm
import arviz as ar
from sklearn.preprocessing import scale
import matplotlib.pyplot as pl
from causalgraphicalmodels import CausalGraphicalModel
from cmocean.cm import balance_r
from seaborn import heatmap
"""
Example: infer direct influence of both parents (P) and grand parents (G) on the
educational achievement of children (C).
"""
dag_ed1 = CausalGraphicalModel(nodes=['P', 'G', 'C'],
                               edges=[('G', 'P'), ('G', 'C'), ('P', 'C')])
dag_ed1.draw()
"""
But we suppose ther are unmeasured, common influences on parents and their
children (e.g. neighborhoods, not shared by grandparent who live elsewhere).
"""

dag_ed2 = CausalGraphicalModel(nodes=['G', 'P', 'C', 'U'],
                               edges=[('G', 'P'), ('U', 'P'), ('G', 'C'),
                                      ('P', 'C'), ('U', 'C')])
dag_ed2.draw()
"""
The DAG above implies that:
(1) P is some function of G and U
(2) C is some function of G, P, and U
(3) G and U are not functions of any other known variables.
Ejemplo n.º 16
0
# -*- coding: utf-8 -*-
"""
A graphical model to demonstrate conditional independence. Shows a graph that
assumes travel cost is conditionally independent of travel time, conditional on
travel distance.
"""
from causalgraphicalmodels import CausalGraphicalModel

TRAVEL_TIME = "Travel Time"
TRAVEL_COST = "Travel Cost"
TRAVEL_DISTANCE = "Travel Distance"

EXAMPLE_GRAPH = CausalGraphicalModel(
    nodes=[TRAVEL_TIME, TRAVEL_COST, TRAVEL_DISTANCE],
    edges=[(TRAVEL_DISTANCE, TRAVEL_TIME), (TRAVEL_DISTANCE, TRAVEL_COST)],
)
import daft
import matplotlib.pyplot as plt
from causalgraphicalmodels import CausalGraphicalModel

plant_dag = CausalGraphicalModel(nodes=["H0", "H1", "F", "T"],
                                 edges=[("H0", "H1"), ("F", "H1"), ("T", "F")])
pgm = daft.PGM()
coordinates = {"H0": (0, 0), "T": (4, 0), "F": (3, 0), "H1": (2, 0)}
for node in plant_dag.dag.nodes:
    pgm.add_node(node, node, *coordinates[node])
for edge in plant_dag.dag.edges:
    pgm.add_edge(*edge)
pgm.render()
plt.gca().invert_yaxis()
plt.show()
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 29 09:55:00 2020

@author: Mhuth
"""

from causalgraphicalmodels import CausalGraphicalModel

nod = [
    'region', 'oral', 'sales bans', 'time', 'ideas about children', 'controls'
]

ed = [('region', 'oral'), ('region', 'sales bans'),
      ('region', 'ideas about children'), ('sales bans', 'oral'),
      ('time', 'oral'), ('time', 'ideas about children'), ('time', 'controls'),
      ('ideas about children', 'oral')]
cg1 = CausalGraphicalModel(nodes=nod, edges=ed)

cg1.draw()
Ejemplo n.º 19
0
# +
SEED = 10
np.random.seed(SEED)
tf.random.set_random_seed(SEED)

data_generation_graph = CausalGraphicalModel(
    nodes=['a', 'b', 'c', 'confounder', 'd', 'e', 'f', 'y'],
    edges=[
        ("a", "y"),
        ("b", "y"),
        ("c", "y"),
        ("d", "y"),
        ("e", "y"),
        ("f", "y"),
        ("confounder", "y"),
        ("confounder", "a"),
        ("confounder", "b"),
        #         ("confounder", "c"),
        ("confounder", "d"),
        ("confounder", "e"),
        ("confounder", "f"),

        #         ("confounder", "y"),
        #         ("confounder", "y"),
    ])

# draw return a graphviz `dot` object, which jupyter can render
data_generation_graph.draw()
# -

# ## Specifiy the paramteric relationship between the covariates.
Ejemplo n.º 20
0
pm.forestplot([trace_7, trace_6, trace_5], models=['m7', 'm6', 'm5'],
             varnames=['bM', 'bN'], rhat=False, alpha=0.11);
pd.plotting.scatter_matrix(dcc[['M', 'N', 'K']]);

"""
The regression model (m5_7) asks if high N is associated with high K. Likewise m5_7 asks whether
high M implies high K. Bigger species like apes (big M) have milk with less energy. But spp with
more neocortex (big N) have richer milk (big K). The fact that M and N are correlated makes these
relationships difficult to see unless both factors are accounted for.
----o----

Simulating a Masking Relationship. Two predictors (M, N) are correlated with one another, and one (M)
is positively correlated with the target (K) while the other (N) is negatively correlated with K
"""
div_msk = CausalGraphicalModel(nodes=['M', 'N', 'K'],
                               edges=[('M', 'K'), ('N', 'K'), ('M', 'N')])
div_msk.draw()

n = 100
M = np.random.normal(size=n)
N = np.random.normal(loc=M, size=n)
K = np.random.normal(loc=N-M, size=n)

d_sim = pd.DataFrame(dict(K=K, M=M, N=N))
pd.plotting.scatter_matrix(d_sim, alpha=0.5, diagonal=);


with pm.Model() as m5_sim:
    a = pm.Normal('a', 0, 0.2)
    bN = pm.Normal('bN', 0, 0.5)
    σ = pm.Exponential('sigma', 1)
Ejemplo n.º 21
0
"""
Treatment appears to have negligible effect even though βF posterior indicates fungus impacts
growth.
The problem is that fungus is a consequence of treatment; i.e. fungus is a post-treatment variable.
The model asked the question "Once we know fungus is present does treatment matter?" ⇒ No.
The next model ignores the fungus variable
"""

with pm.Model() as m8:
    σ = pm.Exponential('σ', 1)
    α = pm.Lognormal('α', 0, 0.2)
    βT = pm.Normal('βT', 0, 0.5)
    p = α + βT * d.treatment.values
    μ = d.h0.values * p
    h1 = pm.Normal('h1', mu=μ, sd=σ, observed=d.h1.values)
    trc8 = pm.sample(tune=1000)

pm.summary(trc8)
"""
Now the treatment effect is plain to see. Note that:
1. It makes sense to control for pre-treatment differences such as initial height, h0, here.
2. Including post-treatment variables can mask the treatment itself.
3. Note that model m7 is still useful to identify the causal mechanism!
"""
plant_dag = CausalGraphicalModel(nodes=['H0', 'H1', 'T', 'F'],
                                 edges=[('H0', 'H1'), ('T', 'F'), ('F', 'H1')])
plant_dag.draw()
plant_dag.is_d_separated('T', 'H1')
plant_dag.is_d_separated('T', 'H1', 'F')
plant_dag.get_all_independence_relationships()
Ejemplo n.º 22
0
# Compute conditional independencies from a directed graphical model

# Uses this library
# https://github.com/ijmbarr/causalgraphicalmodels

# Code is based on
# https://fehiepsi.github.io/rethinking-numpyro/06-the-haunted-dag-and-the-causal-terror.html

from causalgraphicalmodels import CausalGraphicalModel

dag = CausalGraphicalModel(
    nodes=["X", "Y", "C", "U", "B", "A"],
    edges=[
        ("X", "Y"),
        ("U", "X"),
        ("A", "U"),
        ("A", "C"),
        ("C", "Y"),
        ("U", "B"),
        ("C", "B"),
    ],
)

all_independencies = dag.get_all_independence_relationships()
print(all_independencies)
print('\n')
strong-ignorability (`DRIVE_ALONE_UTILITY`) and under the assumptions of
unobserved confounding (`LATENT_DRIVE_ALONE_UTILITY`) from "The Blessings of
Multiple Causes" (2018) by Wang and Blei.
"""
from causalgraphicalmodels import CausalGraphicalModel

DRIVE_ALONE_UTILITY = CausalGraphicalModel(
    nodes=[
        "Total Travel Distance",
        "Total Travel Time",
        "Total Travel Cost",
        "Number of Autos",
        "Number of Licensed Drivers",
        "Utility (Drive Alone)",
    ],
    edges=[
        ("Total Travel Distance", "Total Travel Time"),
        ("Total Travel Distance", "Total Travel Cost"),
        ("Total Travel Distance", "Utility (Drive Alone)"),
        ("Total Travel Time", "Utility (Drive Alone)"),
        ("Total Travel Cost", "Utility (Drive Alone)"),
        ("Number of Autos", "Utility (Drive Alone)"),
        ("Number of Licensed Drivers", "Utility (Drive Alone)"),
    ],
)

drive_alone_nodes = list(DRIVE_ALONE_UTILITY.dag.nodes)
nodes_for_latent_graph = ["confounder"] + drive_alone_nodes
edges_for_latent_graph = [("confounder", x) for x in drive_alone_nodes] + [
    (x, "Utility (Drive Alone)") for x in drive_alone_nodes[:-1]
]