예제 #1
0
    def construct_net(self, df):
        anxiety = make_bernoulli('anxiety', value=df.anxiety)
        peer_pressure = make_bernoulli('peer_pressure', value=df.peer_pressure)
        smoking = cartesian_child('smoking',
                                  parents=[anxiety, peer_pressure],
                                  value=df.smoking)
        yellow_fingers = cartesian_child('yellow_fingers',
                                         parents=[smoking],
                                         value=df.yellow_fingers)
        genetics = make_bernoulli('genetics', value=df.genetics)
        lung_cancer = cartesian_child('lung_cancer',
                                      parents=[smoking, genetics],
                                      value=df.lung_cancer)
        allergy = make_bernoulli('allergy', value=df.allergy)
        coughing = cartesian_child('coughing',
                                   parents=[allergy, lung_cancer],
                                   value=df.coughing)
        fatigue = cartesian_child('fatigue',
                                  parents=[lung_cancer, coughing],
                                  value=df.fatigue)
        attention_disorder = cartesian_child('attention_disorder',
                                             parents=[genetics],
                                             value=df.attention_disorder)
        car_accident = cartesian_child('car_accident',
                                       parents=[fatigue, attention_disorder],
                                       value=df.car_accident)

        # sample from the prior
        model = pymc.Model([
            anxiety, peer_pressure, smoking, yellow_fingers, genetics,
            lung_cancer, allergy, coughing, fatigue, attention_disorder,
            car_accident
        ])
        return model
예제 #2
0
def test_cartesian_bernoulli_child_of_categorical_parent():
    coeffs = {
        'CF: p(feeling_sick)': 0.55,
        'CF: p(day_of_week)': [0.013, 0.626, 0.039, 0.108, 0.134, 0.019],
        'CF: p(staying_home | day_of_week=5 feeling_sick=1)': 0.240,
        'CF: p(staying_home | day_of_week=3 feeling_sick=1)': 0.467,
        'CF: p(staying_home | day_of_week=4 feeling_sick=1)': 0.603,
        'CF: p(staying_home | day_of_week=0 feeling_sick=0)': 0.974,
        'CF: p(staying_home | day_of_week=6 feeling_sick=1)': 0.331,
        'CF: p(staying_home | day_of_week=6 feeling_sick=0)': 0.009,
        'CF: p(staying_home | day_of_week=2 feeling_sick=1)': 0.317,
        'CF: p(staying_home | day_of_week=0 feeling_sick=1)': 0.900,
        'CF: p(staying_home | day_of_week=2 feeling_sick=0)': 0.651,
        'CF: p(staying_home | day_of_week=1 feeling_sick=0)': 0.603,
        'CF: p(staying_home | day_of_week=1 feeling_sick=1)': 0.954,
        'CF: p(staying_home | day_of_week=3 feeling_sick=0)': 0.856,
        'CF: p(staying_home | day_of_week=5 feeling_sick=0)': 0.606,
        'CF: p(staying_home | day_of_week=4 feeling_sick=0)': 0.828
    }
    np.random.seed = 1
    # define the model with fixed *coefficients*
    day_of_week = make_categorical('day_of_week', levels=7, N=1, fixed=coeffs)
    feeling_sick = make_bernoulli('feeling_sick', N=1, fixed=coeffs)
    staying_home = cartesian_bernoulli_child('staying_home',
                                             [day_of_week, feeling_sick],
                                             N=1,
                                             fixed=coeffs)
    model = pymc.Model([day_of_week, feeling_sick, staying_home])

    # sample data from the model
    sampler = pymc.MCMC(model)
    sampler.sample(iter=500)
    day_of_week_val = sampler.trace('day_of_week')[:]
    feeling_sick_val = sampler.trace('feeling_sick')[:]
    staying_home_val = sampler.trace('staying_home')[:]

    # define the model again this time with fixed *data*
    day_of_week, c1 = make_categorical('day_of_week',
                                       levels=7,
                                       value=day_of_week_val,
                                       return_coeffs=True)
    feeling_sick, c2 = make_bernoulli('feeling_sick',
                                      value=feeling_sick_val,
                                      return_coeffs=True)
    staying_home, c3 = cartesian_bernoulli_child('staying_home',
                                                 [day_of_week, feeling_sick],
                                                 value=staying_home_val,
                                                 return_coeffs=True)
    model = pymc.Model([day_of_week, feeling_sick, staying_home] + c1 + c2 +
                       c3)
    sampler = pymc.MCMC(model)
    sampler.sample(iter=500, burn=300)

    cname = 'CF: p(staying_home | day_of_week=1 feeling_sick=0)'
    print cname
    assert np.isclose(coeffs[cname], sampler.trace(cname)[:].mean(), atol=0.3)
    for c in coeffs:
        print c, coeffs[c], sampler.trace(c)[:].mean()
예제 #3
0
    def construct_net(self, df):
        rain = make_bernoulli('rain', value=df.rain)
        sprinkler = make_bernoulli('sprinkler', value=df.sprinkler)
        sidewalk = cartesian_child('wet_sidewalk',
                                   parents=[rain, sprinkler],
                                   value=df.wet_sidewalk)

        model = pymc.Model([rain, sprinkler, sidewalk])
        return model
예제 #4
0
def test_cartesian_categorical_child_creates_correctly_named_coefficients():
    mum = make_bernoulli('mum', N=1)
    dad = make_bernoulli('dad', N=1)
    child, coeffs = cartesian_categorical_child('child', [mum, dad],
                                                levels=4,
                                                N=1,
                                                return_coeffs=True)

    coeff_names = {str(coeff) for coeff in coeffs}
    assert coeff_names == {
        'CF: p(child | mum=0 dad=0)', 'CF: p(child | mum=1 dad=1)',
        'CF: p(child | mum=1 dad=0)', 'CF: p(child | mum=0 dad=1)', 'p(child)'
    }
예제 #5
0
    def construct_net(self, df):
        # This is the same network as was used to generate the data, but there
        # are some differences in how it is set up:
        # - we don't fix the values of coefficients - the whole point is for the model to learn them!
        # - we provide 'value' parameter - because these variables are observed
        # - we don't need the 'N' parameter - because it is assumed that N = len(value)
        rain = make_bernoulli('rain', value=df.rain)
        sprinkler = make_bernoulli('sprinkler', value=df.sprinkler)
        grass_wet = cartesian_child('grass_wet',
                                    parents=[rain, sprinkler],
                                    levels=2,
                                    value=df.grass_wet)

        model = pymc.Model([rain, sprinkler, grass_wet])
        return model
예제 #6
0
def test_make_bernoulli_returns_variable_with_beta_parent():
    name = 'rosebud'
    bernoulli_var = make_bernoulli(name, value=[0, 1, 1, 0])
    parent = bernoulli_var.parents['p']

    assert bernoulli_var.observed
    assert not parent.observed
    assert str(parent) == COEFFS_PREFIX + 'p(%s)' % name
    assert isinstance(parent, pymc.distributions.Beta)
예제 #7
0
def test_cartesian_bernoulli_child_creates_correctly_named_coefficients():
    mum = make_bernoulli('mum', N=1)
    dad = make_bernoulli('dad', N=1)
    child, coeffs = cartesian_bernoulli_child('child', [mum, dad],
                                              N=1,
                                              return_coeffs=True)
    coeff_names = {str(coeff) for coeff in coeffs}
    assert coeff_names == {
        'CF: p(child | mum=0 dad=0)', 'CF: p(child | mum=1 dad=1)',
        'CF: p(child | mum=1 dad=0)', 'CF: p(child | mum=0 dad=1)', 'p(child)'
    }

    single_parent = make_bernoulli('single_parent', N=1)
    child, coeffs = cartesian_bernoulli_child('child', [single_parent],
                                              N=1,
                                              return_coeffs=True)
    coeff_names = {str(coeff) for coeff in coeffs}
    assert coeff_names == {
        'p(child)', 'CF: p(child | single_parent=0)',
        'CF: p(child | single_parent=1)'
    }
예제 #8
0
def get_rsw_data_random_coeffs(size):
    """generates a data from the famous 'rain - sprinkler - wet grass' bayesian network
    returns as a dataframe.
    """
    rain = make_bernoulli('rain', N=1)
    sprinkler = cartesian_child('sprinkler', parents=[rain], levels=2, N=1)
    grass_wet = cartesian_child('grass_wet',
                                parents=[sprinkler, rain],
                                levels=2,
                                N=1)

    # sample from the prior
    model = pymc.Model([rain, sprinkler, grass_wet])
    coefficients = sample_coeffs(model)

    # define the bayesian net
    rain = make_bernoulli('rain', N=1, fixed=coefficients)
    sprinkler = cartesian_child('sprinkler',
                                parents=[rain],
                                levels=2,
                                N=1,
                                fixed=coefficients)
    grass_wet = cartesian_child('grass_wet',
                                parents=[sprinkler, rain],
                                levels=2,
                                N=1,
                                fixed=coefficients)

    # sample from the prior
    model = pymc.Model([rain, sprinkler, grass_wet])
    sampler = pymc.MCMC(model)
    sampler.sample(iter=size + 1000, burn=1000)

    data = pd.DataFrame({
        str(node): sampler.trace(str(node))[:].ravel() + 0
        for node in sampler.stochastics if not str(node).startswith('CF: ')
    })
    return data
예제 #9
0
def test_cartesian_bernoulli_child():
    # define the model with no data just to sample all the coefficients from
    # their priors
    has_garden = make_bernoulli('has_garden', N=1)
    is_big = make_bernoulli('is_big', N=1)
    is_green = cartesian_bernoulli_child('is_green', [is_big, has_garden], N=1)
    model = pymc.Model([has_garden, is_big, is_green])

    coeff_values = sample_coeffs(model)

    # define identical model again but fix coefficient values
    has_garden = make_bernoulli('has_garden', N=1, fixed=coeff_values)
    is_big = make_bernoulli('is_big', N=1, fixed=coeff_values)
    is_green = cartesian_bernoulli_child('is_green', [is_big, has_garden],
                                         N=1,
                                         fixed=coeff_values)
    fx_model = pymc.Model([has_garden, is_big, is_green])
    # sample from the model with fixed coefficients
    fx_sampler = pymc.MCMC(fx_model)
    fx_sampler.sample(iter=2000)

    has_garden_sample = fx_sampler.trace('has_garden')[:]
    is_big_sample = fx_sampler.trace('is_big')[:]
    is_green_sample = fx_sampler.trace('is_green')[:]

    # define identical model again but fix coefficient values
    has_garden, cfs1 = make_bernoulli('has_garden',
                                      value=has_garden_sample,
                                      return_coeffs=True)
    is_big, cfs2 = make_bernoulli('is_big',
                                  value=is_big_sample,
                                  return_coeffs=True)
    is_green, cfs3 = cartesian_bernoulli_child('is_green',
                                               [is_big, has_garden],
                                               value=is_green_sample,
                                               return_coeffs=True)

    model = pymc.Model(cfs1 + cfs2 + cfs3)
    sampler = pymc.MCMC(model)
    sampler.sample(iter=2000, burn=1000)

    for pymc_var in sampler.stochastics:
        name = str(pymc_var)
        mean_posterior = sampler.trace(name)[:].mean()
        actual = coeff_values[name]
        print "%s  %.3f   %.3f" % (pad(name, 30), mean_posterior, actual)
        assert np.isclose(mean_posterior, actual, rtol=0.1, atol=0.1)
예제 #10
0
def get_rsw_data(size):
    """generates a data from the famous 'rain - sprinkler - wet grass' bayesian network
    returns as a dataframe.
    """

    coefficients = {
        'CF: p(rain)': 0.2,
        'CF: p(grass_wet | sprinkler=0 rain=0)': 0.,
        'CF: p(grass_wet | sprinkler=0 rain=1)': 0.8,
        'CF: p(grass_wet | sprinkler=1 rain=0)': 0.9,
        'CF: p(grass_wet | sprinkler=1 rain=1)': 0.99,
        'CF: p(sprinkler | rain=0)': 0.4,
        'CF: p(sprinkler | rain=1)': 0.01
    }

    # define the bayesian net
    rain = make_bernoulli('rain', N=1, fixed=coefficients)
    sprinkler = cartesian_child('sprinkler',
                                parents=[rain],
                                levels=2,
                                N=1,
                                fixed=coefficients)
    grass_wet = cartesian_child('grass_wet',
                                parents=[sprinkler, rain],
                                levels=2,
                                N=1,
                                fixed=coefficients)

    # sample from the prior
    model = pymc.Model([rain, sprinkler, grass_wet])
    sampler = pymc.MCMC(model)
    sampler.sample(iter=size, burn=0)

    data = pd.DataFrame({
        str(node): sampler.trace(str(node))[:].ravel() + 0
        for node in sampler.stochastics
    })
    return data
예제 #11
0
    def construct_net(self, df):
        mileage = make_bernoulli('mileage', value=df.mileage)
        age = make_bernoulli('age', value=df.age)
        socio_econ = cartesian_child('socio_econ',
                                     parents=[age],
                                     value=df.socio_econ)
        risk_aversion = cartesian_child('risk_aversion',
                                        parents=[age, socio_econ],
                                        value=df.risk_aversion)
        senior_train = cartesian_child('senior_train',
                                       parents=[age, risk_aversion],
                                       value=df.senior_train)
        good_student = cartesian_child('good_student',
                                       parents=[age, socio_econ],
                                       value=df.good_student)
        extra_car = cartesian_child('extra_car',
                                    parents=[socio_econ],
                                    value=df.extra_car)
        driving_skill = cartesian_child('driving_skill',
                                        parents=[age, senior_train],
                                        value=df.driving_skill)
        driving_hist = cartesian_child('driving_hist',
                                       parents=[driving_skill, risk_aversion],
                                       value=df.driving_hist)
        driving_quality = cartesian_child(
            'driving_quality',
            parents=[driving_skill, risk_aversion],
            value=df.driving_quality)
        make_model = cartesian_child('make_model',
                                     parents=[risk_aversion, socio_econ],
                                     value=df.make_model)
        vehicle_year = cartesian_child('vehicle_year',
                                       parents=[socio_econ],
                                       value=df.vehicle_year)
        antilock = cartesian_child('antilock',
                                   parents=[make_model, vehicle_year],
                                   value=df.antilock)
        ruggedness = cartesian_child('ruggedness',
                                     parents=[make_model, antilock],
                                     value=df.ruggedness)
        accident = cartesian_child(
            'accident',
            parents=[driving_quality, antilock, mileage],
            value=df.accident)
        airbag = cartesian_child('airbag',
                                 parents=[make_model, vehicle_year],
                                 value=df.airbag)
        car_value = cartesian_child(
            'car_value',
            parents=[make_model, mileage, vehicle_year],
            value=df.car_value)
        home_base = cartesian_child('home_base',
                                    parents=[risk_aversion, socio_econ],
                                    value=df.home_base)
        anti_theft = cartesian_child('anti_theft',
                                     parents=[risk_aversion, socio_econ],
                                     value=df.anti_theft)
        theft = cartesian_child('theft',
                                parents=[car_value, home_base, anti_theft],
                                value=df.theft)
        own_damage = cartesian_child('own_damage',
                                     parents=[ruggedness, accident],
                                     value=df.own_damage)
        own_car_cost = cartesian_child('own_car_cost',
                                       parents=[own_damage, car_value, theft],
                                       value=df.own_car_cost)
        cushioning = cartesian_child('cushioning',
                                     parents=[ruggedness, accident],
                                     value=df.cushioning)
        medical_cost = cartesian_child('medical_cost',
                                       parents=[age, cushioning, accident],
                                       value=df.medical_cost)
        liability_cost = cartesian_child('liability_cost',
                                         parents=[accident],
                                         value=df.liability_cost)
        other_car_cost = cartesian_child('other_car_cost',
                                         parents=[ruggedness, accident],
                                         value=df.other_car_cost)
        property_cost = cartesian_child('property_cost',
                                        parents=[other_car_cost, own_car_cost],
                                        value=df.property_cost)

        model = pymc.Model([
            mileage, age, socio_econ, good_student, extra_car, driving_skill,
            risk_aversion, senior_train, driving_hist, driving_quality,
            make_model, vehicle_year, antilock, ruggedness, accident, airbag,
            car_value, home_base, anti_theft, theft, own_damage, own_car_cost,
            cushioning, medical_cost, liability_cost, other_car_cost,
            property_cost
        ])
        return model
예제 #12
0
def get_car_data_random_coeffs(size):
    mileage = make_bernoulli('mileage', N=1)
    age = make_bernoulli('age', N=1)
    socio_econ = cartesian_child('socio_econ', parents=[age], N=1)
    risk_aversion = cartesian_child('risk_aversion',
                                    parents=[age, socio_econ],
                                    N=1)
    senior_train = cartesian_child('senior_train',
                                   parents=[age, risk_aversion],
                                   N=1)
    good_student = cartesian_child('good_student',
                                   parents=[age, socio_econ],
                                   N=1)
    extra_car = cartesian_child('extra_car', parents=[socio_econ], N=1)
    driving_skill = cartesian_child('driving_skill',
                                    parents=[age, senior_train],
                                    N=1)
    driving_hist = cartesian_child('driving_hist',
                                   parents=[driving_skill, risk_aversion],
                                   N=1)
    driving_quality = cartesian_child('driving_quality',
                                      parents=[driving_skill, risk_aversion],
                                      N=1)
    make_model = cartesian_child('make_model',
                                 parents=[risk_aversion, socio_econ],
                                 N=1)
    vehicle_year = cartesian_child('vehicle_year', parents=[socio_econ], N=1)
    antilock = cartesian_child('antilock',
                               parents=[make_model, vehicle_year],
                               N=1)
    ruggedness = cartesian_child('ruggedness',
                                 parents=[make_model, antilock],
                                 N=1)
    accident = cartesian_child('accident',
                               parents=[driving_quality, antilock, mileage],
                               N=1)
    airbag = cartesian_child('airbag', parents=[make_model, vehicle_year], N=1)
    car_value = cartesian_child('car_value',
                                parents=[make_model, mileage, vehicle_year],
                                N=1)
    home_base = cartesian_child('home_base',
                                parents=[risk_aversion, socio_econ],
                                N=1)
    anti_theft = cartesian_child('anti_theft',
                                 parents=[risk_aversion, socio_econ],
                                 N=1)
    theft = cartesian_child('theft',
                            parents=[car_value, home_base, anti_theft],
                            N=1)
    own_damage = cartesian_child('own_damage',
                                 parents=[ruggedness, accident],
                                 N=1)
    own_car_cost = cartesian_child('own_car_cost',
                                   parents=[own_damage, car_value, theft],
                                   N=1)
    cushioning = cartesian_child('cushioning',
                                 parents=[ruggedness, accident],
                                 N=1)
    medical_cost = cartesian_child('medical_cost',
                                   parents=[age, cushioning, accident],
                                   N=1)
    liability_cost = cartesian_child('liability_cost', parents=[accident], N=1)
    other_car_cost = cartesian_child('other_car_cost',
                                     parents=[ruggedness, accident],
                                     N=1)
    property_cost = cartesian_child('property_cost',
                                    parents=[other_car_cost, own_car_cost],
                                    N=1)

    model = pymc.Model([
        mileage, age, socio_econ, good_student, extra_car, driving_skill,
        risk_aversion, senior_train, driving_hist, driving_quality, make_model,
        vehicle_year, antilock, ruggedness, accident, airbag, car_value,
        home_base, anti_theft, theft, own_damage, own_car_cost, cushioning,
        medical_cost, liability_cost, other_car_cost, property_cost
    ])

    coefficients = sample_coeffs(model)

    mileage = make_bernoulli('mileage', N=1, fixed=coefficients)
    age = make_bernoulli('age', N=1, fixed=coefficients)
    socio_econ = cartesian_child('socio_econ',
                                 parents=[age],
                                 N=1,
                                 fixed=coefficients)
    risk_aversion = cartesian_child('risk_aversion',
                                    parents=[age, socio_econ],
                                    N=1,
                                    fixed=coefficients)
    senior_train = cartesian_child('senior_train',
                                   parents=[age, risk_aversion],
                                   N=1,
                                   fixed=coefficients)
    good_student = cartesian_child('good_student',
                                   parents=[age, socio_econ],
                                   N=1,
                                   fixed=coefficients)
    extra_car = cartesian_child('extra_car',
                                parents=[socio_econ],
                                N=1,
                                fixed=coefficients)
    driving_skill = cartesian_child('driving_skill',
                                    parents=[age, senior_train],
                                    N=1)
    driving_hist = cartesian_child('driving_hist',
                                   parents=[driving_skill, risk_aversion],
                                   N=1,
                                   fixed=coefficients)
    driving_quality = cartesian_child('driving_quality',
                                      parents=[driving_skill, risk_aversion],
                                      N=1,
                                      fixed=coefficients)
    make_model = cartesian_child('make_model',
                                 parents=[risk_aversion, socio_econ],
                                 N=1,
                                 fixed=coefficients)
    vehicle_year = cartesian_child('vehicle_year',
                                   parents=[socio_econ],
                                   N=1,
                                   fixed=coefficients)
    antilock = cartesian_child('antilock',
                               parents=[make_model, vehicle_year],
                               N=1,
                               fixed=coefficients)
    ruggedness = cartesian_child('ruggedness',
                                 parents=[make_model, antilock],
                                 N=1,
                                 fixed=coefficients)
    accident = cartesian_child('accident',
                               parents=[driving_quality, antilock, mileage],
                               N=1,
                               fixed=coefficients)
    airbag = cartesian_child('airbag',
                             parents=[make_model, vehicle_year],
                             N=1,
                             fixed=coefficients)
    car_value = cartesian_child('car_value',
                                parents=[make_model, mileage, vehicle_year],
                                N=1,
                                fixed=coefficients)
    home_base = cartesian_child('home_base',
                                parents=[risk_aversion, socio_econ],
                                N=1,
                                fixed=coefficients)
    anti_theft = cartesian_child('anti_theft',
                                 parents=[risk_aversion, socio_econ],
                                 N=1,
                                 fixed=coefficients)
    theft = cartesian_child('theft',
                            parents=[car_value, home_base, anti_theft],
                            N=1,
                            fixed=coefficients)
    own_damage = cartesian_child('own_damage',
                                 parents=[ruggedness, accident],
                                 N=1,
                                 fixed=coefficients)
    own_car_cost = cartesian_child('own_car_cost',
                                   parents=[own_damage, car_value, theft],
                                   N=1,
                                   fixed=coefficients)
    cushioning = cartesian_child('cushioning',
                                 parents=[ruggedness, accident],
                                 N=1,
                                 fixed=coefficients)
    medical_cost = cartesian_child('medical_cost',
                                   parents=[age, cushioning, accident],
                                   N=1,
                                   fixed=coefficients)
    liability_cost = cartesian_child('liability_cost',
                                     parents=[accident],
                                     N=1,
                                     fixed=coefficients)
    other_car_cost = cartesian_child('other_car_cost',
                                     parents=[ruggedness, accident],
                                     N=1,
                                     fixed=coefficients)
    property_cost = cartesian_child('property_cost',
                                    parents=[other_car_cost, own_car_cost],
                                    N=1,
                                    fixed=coefficients)

    model = pymc.Model([
        mileage, age, socio_econ, good_student, extra_car, driving_skill,
        risk_aversion, senior_train, driving_hist, driving_quality, make_model,
        vehicle_year, antilock, ruggedness, accident, airbag, car_value,
        home_base, anti_theft, theft, own_damage, own_car_cost, cushioning,
        medical_cost, liability_cost, other_car_cost, property_cost
    ])

    sampler = pymc.MCMC(model)
    sampler.sample(iter=size, burn=0)

    data = pd.DataFrame({
        str(node): sampler.trace(str(node))[:].ravel() + 0
        for node in sampler.stochastics if not str(node).startswith('CF: ')
    })
    return data
예제 #13
0
def get_cancer_data_random_coeffs(size):
    anxiety = make_bernoulli('anxiety', N=1)
    peer_pressure = make_bernoulli('peer_pressure', N=1)
    smoking = cartesian_child('smoking', parents=[anxiety, peer_pressure], N=1)
    yellow_fingers = cartesian_child('yellow_fingers', parents=[smoking], N=1)
    genetics = make_bernoulli('genetics', N=1)
    lung_cancer = cartesian_child('lung_cancer',
                                  parents=[smoking, genetics],
                                  N=1)
    allergy = make_bernoulli('allergy', N=1)
    coughing = cartesian_child('coughing', parents=[allergy, lung_cancer], N=1)
    fatigue = cartesian_child('fatigue', parents=[lung_cancer, coughing], N=1)
    attention_disorder = cartesian_child('attention_disorder',
                                         parents=[genetics],
                                         N=1)
    car_accident = cartesian_child('car_accident',
                                   parents=[fatigue, attention_disorder],
                                   N=1)
    model = pymc.Model([
        anxiety, peer_pressure, smoking, yellow_fingers, genetics, lung_cancer,
        allergy, coughing, fatigue, attention_disorder, car_accident
    ])
    coefficients = sample_coeffs(model)

    anxiety = make_bernoulli('anxiety', N=1, fixed=coefficients)
    peer_pressure = make_bernoulli('peer_pressure', N=1, fixed=coefficients)
    smoking = cartesian_child('smoking',
                              parents=[anxiety, peer_pressure],
                              N=1,
                              fixed=coefficients)
    yellow_fingers = cartesian_child('yellow_fingers',
                                     parents=[smoking],
                                     N=1,
                                     fixed=coefficients)
    genetics = make_bernoulli('genetics', N=1, fixed=coefficients)
    lung_cancer = cartesian_child('lung_cancer',
                                  parents=[smoking, genetics],
                                  N=1,
                                  fixed=coefficients)
    allergy = make_bernoulli('allergy', N=1, fixed=coefficients)
    coughing = cartesian_child('coughing',
                               parents=[allergy, lung_cancer],
                               N=1,
                               fixed=coefficients)
    fatigue = cartesian_child('fatigue',
                              parents=[lung_cancer, coughing],
                              N=1,
                              fixed=coefficients)
    attention_disorder = cartesian_child('attention_disorder',
                                         parents=[genetics],
                                         N=1,
                                         fixed=coefficients)
    car_accident = cartesian_child('car_accident',
                                   parents=[fatigue, attention_disorder],
                                   N=1,
                                   fixed=coefficients)

    # sample from the prior
    model = pymc.Model([
        anxiety, peer_pressure, smoking, yellow_fingers, genetics, lung_cancer,
        allergy, coughing, fatigue, attention_disorder, car_accident
    ])
    sampler = pymc.MCMC(model)
    sampler.sample(iter=size, burn=0)

    data = pd.DataFrame({
        str(node): sampler.trace(str(node))[:].ravel() + 0
        for node in sampler.stochastics
    })
    return data