def construct_net(self, df): anxiety = make_bernoulli('anxiety', value=df.anxiety) peer_pressure = make_bernoulli('peer_pressure', value=df.peer_pressure) smoking = cartesian_child('smoking', parents=[anxiety, peer_pressure], value=df.smoking) yellow_fingers = cartesian_child('yellow_fingers', parents=[smoking], value=df.yellow_fingers) genetics = make_bernoulli('genetics', value=df.genetics) lung_cancer = cartesian_child('lung_cancer', parents=[smoking, genetics], value=df.lung_cancer) allergy = make_bernoulli('allergy', value=df.allergy) coughing = cartesian_child('coughing', parents=[allergy, lung_cancer], value=df.coughing) fatigue = cartesian_child('fatigue', parents=[lung_cancer, coughing], value=df.fatigue) attention_disorder = cartesian_child('attention_disorder', parents=[genetics], value=df.attention_disorder) car_accident = cartesian_child('car_accident', parents=[fatigue, attention_disorder], value=df.car_accident) # sample from the prior model = pymc.Model([ anxiety, peer_pressure, smoking, yellow_fingers, genetics, lung_cancer, allergy, coughing, fatigue, attention_disorder, car_accident ]) return model
def construct_net(self, df): rain = make_bernoulli('rain', value=df.rain) sprinkler = make_bernoulli('sprinkler', value=df.sprinkler) sidewalk = cartesian_child('wet_sidewalk', parents=[rain, sprinkler], value=df.wet_sidewalk) model = pymc.Model([rain, sprinkler, sidewalk]) return model
def get_rsw_data_random_coeffs(size): """generates a data from the famous 'rain - sprinkler - wet grass' bayesian network returns as a dataframe. """ rain = make_bernoulli('rain', N=1) sprinkler = cartesian_child('sprinkler', parents=[rain], levels=2, N=1) grass_wet = cartesian_child('grass_wet', parents=[sprinkler, rain], levels=2, N=1) # sample from the prior model = pymc.Model([rain, sprinkler, grass_wet]) coefficients = sample_coeffs(model) # define the bayesian net rain = make_bernoulli('rain', N=1, fixed=coefficients) sprinkler = cartesian_child('sprinkler', parents=[rain], levels=2, N=1, fixed=coefficients) grass_wet = cartesian_child('grass_wet', parents=[sprinkler, rain], levels=2, N=1, fixed=coefficients) # sample from the prior model = pymc.Model([rain, sprinkler, grass_wet]) sampler = pymc.MCMC(model) sampler.sample(iter=size + 1000, burn=1000) data = pd.DataFrame({ str(node): sampler.trace(str(node))[:].ravel() + 0 for node in sampler.stochastics if not str(node).startswith('CF: ') }) return data
def get_rsw_data(size): """generates a data from the famous 'rain - sprinkler - wet grass' bayesian network returns as a dataframe. """ coefficients = { 'CF: p(rain)': 0.2, 'CF: p(grass_wet | sprinkler=0 rain=0)': 0., 'CF: p(grass_wet | sprinkler=0 rain=1)': 0.8, 'CF: p(grass_wet | sprinkler=1 rain=0)': 0.9, 'CF: p(grass_wet | sprinkler=1 rain=1)': 0.99, 'CF: p(sprinkler | rain=0)': 0.4, 'CF: p(sprinkler | rain=1)': 0.01 } # define the bayesian net rain = make_bernoulli('rain', N=1, fixed=coefficients) sprinkler = cartesian_child('sprinkler', parents=[rain], levels=2, N=1, fixed=coefficients) grass_wet = cartesian_child('grass_wet', parents=[sprinkler, rain], levels=2, N=1, fixed=coefficients) # sample from the prior model = pymc.Model([rain, sprinkler, grass_wet]) sampler = pymc.MCMC(model) sampler.sample(iter=size, burn=0) data = pd.DataFrame({ str(node): sampler.trace(str(node))[:].ravel() + 0 for node in sampler.stochastics }) return data
def construct_net(self, df): # This is the same network as was used to generate the data, but there # are some differences in how it is set up: # - we don't fix the values of coefficients - the whole point is for the model to learn them! # - we provide 'value' parameter - because these variables are observed # - we don't need the 'N' parameter - because it is assumed that N = len(value) rain = make_bernoulli('rain', value=df.rain) sprinkler = make_bernoulli('sprinkler', value=df.sprinkler) grass_wet = cartesian_child('grass_wet', parents=[rain, sprinkler], levels=2, value=df.grass_wet) model = pymc.Model([rain, sprinkler, grass_wet]) return model
def construct_net(self, df): mileage = make_bernoulli('mileage', value=df.mileage) age = make_bernoulli('age', value=df.age) socio_econ = cartesian_child('socio_econ', parents=[age], value=df.socio_econ) risk_aversion = cartesian_child('risk_aversion', parents=[age, socio_econ], value=df.risk_aversion) senior_train = cartesian_child('senior_train', parents=[age, risk_aversion], value=df.senior_train) good_student = cartesian_child('good_student', parents=[age, socio_econ], value=df.good_student) extra_car = cartesian_child('extra_car', parents=[socio_econ], value=df.extra_car) driving_skill = cartesian_child('driving_skill', parents=[age, senior_train], value=df.driving_skill) driving_hist = cartesian_child('driving_hist', parents=[driving_skill, risk_aversion], value=df.driving_hist) driving_quality = cartesian_child( 'driving_quality', parents=[driving_skill, risk_aversion], value=df.driving_quality) make_model = cartesian_child('make_model', parents=[risk_aversion, socio_econ], value=df.make_model) vehicle_year = cartesian_child('vehicle_year', parents=[socio_econ], value=df.vehicle_year) antilock = cartesian_child('antilock', parents=[make_model, vehicle_year], value=df.antilock) ruggedness = cartesian_child('ruggedness', parents=[make_model, antilock], value=df.ruggedness) accident = cartesian_child( 'accident', parents=[driving_quality, antilock, mileage], value=df.accident) airbag = cartesian_child('airbag', parents=[make_model, vehicle_year], value=df.airbag) car_value = cartesian_child( 'car_value', parents=[make_model, mileage, vehicle_year], value=df.car_value) home_base = cartesian_child('home_base', parents=[risk_aversion, socio_econ], value=df.home_base) anti_theft = cartesian_child('anti_theft', parents=[risk_aversion, socio_econ], value=df.anti_theft) theft = cartesian_child('theft', parents=[car_value, home_base, anti_theft], value=df.theft) own_damage = cartesian_child('own_damage', parents=[ruggedness, accident], value=df.own_damage) own_car_cost = cartesian_child('own_car_cost', parents=[own_damage, car_value, theft], value=df.own_car_cost) cushioning = cartesian_child('cushioning', parents=[ruggedness, accident], value=df.cushioning) medical_cost = cartesian_child('medical_cost', parents=[age, cushioning, accident], value=df.medical_cost) liability_cost = cartesian_child('liability_cost', parents=[accident], value=df.liability_cost) other_car_cost = cartesian_child('other_car_cost', parents=[ruggedness, accident], value=df.other_car_cost) property_cost = cartesian_child('property_cost', parents=[other_car_cost, own_car_cost], value=df.property_cost) model = pymc.Model([ mileage, age, socio_econ, good_student, extra_car, driving_skill, risk_aversion, senior_train, driving_hist, driving_quality, make_model, vehicle_year, antilock, ruggedness, accident, airbag, car_value, home_base, anti_theft, theft, own_damage, own_car_cost, cushioning, medical_cost, liability_cost, other_car_cost, property_cost ]) return model
def get_car_data_random_coeffs(size): mileage = make_bernoulli('mileage', N=1) age = make_bernoulli('age', N=1) socio_econ = cartesian_child('socio_econ', parents=[age], N=1) risk_aversion = cartesian_child('risk_aversion', parents=[age, socio_econ], N=1) senior_train = cartesian_child('senior_train', parents=[age, risk_aversion], N=1) good_student = cartesian_child('good_student', parents=[age, socio_econ], N=1) extra_car = cartesian_child('extra_car', parents=[socio_econ], N=1) driving_skill = cartesian_child('driving_skill', parents=[age, senior_train], N=1) driving_hist = cartesian_child('driving_hist', parents=[driving_skill, risk_aversion], N=1) driving_quality = cartesian_child('driving_quality', parents=[driving_skill, risk_aversion], N=1) make_model = cartesian_child('make_model', parents=[risk_aversion, socio_econ], N=1) vehicle_year = cartesian_child('vehicle_year', parents=[socio_econ], N=1) antilock = cartesian_child('antilock', parents=[make_model, vehicle_year], N=1) ruggedness = cartesian_child('ruggedness', parents=[make_model, antilock], N=1) accident = cartesian_child('accident', parents=[driving_quality, antilock, mileage], N=1) airbag = cartesian_child('airbag', parents=[make_model, vehicle_year], N=1) car_value = cartesian_child('car_value', parents=[make_model, mileage, vehicle_year], N=1) home_base = cartesian_child('home_base', parents=[risk_aversion, socio_econ], N=1) anti_theft = cartesian_child('anti_theft', parents=[risk_aversion, socio_econ], N=1) theft = cartesian_child('theft', parents=[car_value, home_base, anti_theft], N=1) own_damage = cartesian_child('own_damage', parents=[ruggedness, accident], N=1) own_car_cost = cartesian_child('own_car_cost', parents=[own_damage, car_value, theft], N=1) cushioning = cartesian_child('cushioning', parents=[ruggedness, accident], N=1) medical_cost = cartesian_child('medical_cost', parents=[age, cushioning, accident], N=1) liability_cost = cartesian_child('liability_cost', parents=[accident], N=1) other_car_cost = cartesian_child('other_car_cost', parents=[ruggedness, accident], N=1) property_cost = cartesian_child('property_cost', parents=[other_car_cost, own_car_cost], N=1) model = pymc.Model([ mileage, age, socio_econ, good_student, extra_car, driving_skill, risk_aversion, senior_train, driving_hist, driving_quality, make_model, vehicle_year, antilock, ruggedness, accident, airbag, car_value, home_base, anti_theft, theft, own_damage, own_car_cost, cushioning, medical_cost, liability_cost, other_car_cost, property_cost ]) coefficients = sample_coeffs(model) mileage = make_bernoulli('mileage', N=1, fixed=coefficients) age = make_bernoulli('age', N=1, fixed=coefficients) socio_econ = cartesian_child('socio_econ', parents=[age], N=1, fixed=coefficients) risk_aversion = cartesian_child('risk_aversion', parents=[age, socio_econ], N=1, fixed=coefficients) senior_train = cartesian_child('senior_train', parents=[age, risk_aversion], N=1, fixed=coefficients) good_student = cartesian_child('good_student', parents=[age, socio_econ], N=1, fixed=coefficients) extra_car = cartesian_child('extra_car', parents=[socio_econ], N=1, fixed=coefficients) driving_skill = cartesian_child('driving_skill', parents=[age, senior_train], N=1) driving_hist = cartesian_child('driving_hist', parents=[driving_skill, risk_aversion], N=1, fixed=coefficients) driving_quality = cartesian_child('driving_quality', parents=[driving_skill, risk_aversion], N=1, fixed=coefficients) make_model = cartesian_child('make_model', parents=[risk_aversion, socio_econ], N=1, fixed=coefficients) vehicle_year = cartesian_child('vehicle_year', parents=[socio_econ], N=1, fixed=coefficients) antilock = cartesian_child('antilock', parents=[make_model, vehicle_year], N=1, fixed=coefficients) ruggedness = cartesian_child('ruggedness', parents=[make_model, antilock], N=1, fixed=coefficients) accident = cartesian_child('accident', parents=[driving_quality, antilock, mileage], N=1, fixed=coefficients) airbag = cartesian_child('airbag', parents=[make_model, vehicle_year], N=1, fixed=coefficients) car_value = cartesian_child('car_value', parents=[make_model, mileage, vehicle_year], N=1, fixed=coefficients) home_base = cartesian_child('home_base', parents=[risk_aversion, socio_econ], N=1, fixed=coefficients) anti_theft = cartesian_child('anti_theft', parents=[risk_aversion, socio_econ], N=1, fixed=coefficients) theft = cartesian_child('theft', parents=[car_value, home_base, anti_theft], N=1, fixed=coefficients) own_damage = cartesian_child('own_damage', parents=[ruggedness, accident], N=1, fixed=coefficients) own_car_cost = cartesian_child('own_car_cost', parents=[own_damage, car_value, theft], N=1, fixed=coefficients) cushioning = cartesian_child('cushioning', parents=[ruggedness, accident], N=1, fixed=coefficients) medical_cost = cartesian_child('medical_cost', parents=[age, cushioning, accident], N=1, fixed=coefficients) liability_cost = cartesian_child('liability_cost', parents=[accident], N=1, fixed=coefficients) other_car_cost = cartesian_child('other_car_cost', parents=[ruggedness, accident], N=1, fixed=coefficients) property_cost = cartesian_child('property_cost', parents=[other_car_cost, own_car_cost], N=1, fixed=coefficients) model = pymc.Model([ mileage, age, socio_econ, good_student, extra_car, driving_skill, risk_aversion, senior_train, driving_hist, driving_quality, make_model, vehicle_year, antilock, ruggedness, accident, airbag, car_value, home_base, anti_theft, theft, own_damage, own_car_cost, cushioning, medical_cost, liability_cost, other_car_cost, property_cost ]) sampler = pymc.MCMC(model) sampler.sample(iter=size, burn=0) data = pd.DataFrame({ str(node): sampler.trace(str(node))[:].ravel() + 0 for node in sampler.stochastics if not str(node).startswith('CF: ') }) return data
def get_cancer_data_random_coeffs(size): anxiety = make_bernoulli('anxiety', N=1) peer_pressure = make_bernoulli('peer_pressure', N=1) smoking = cartesian_child('smoking', parents=[anxiety, peer_pressure], N=1) yellow_fingers = cartesian_child('yellow_fingers', parents=[smoking], N=1) genetics = make_bernoulli('genetics', N=1) lung_cancer = cartesian_child('lung_cancer', parents=[smoking, genetics], N=1) allergy = make_bernoulli('allergy', N=1) coughing = cartesian_child('coughing', parents=[allergy, lung_cancer], N=1) fatigue = cartesian_child('fatigue', parents=[lung_cancer, coughing], N=1) attention_disorder = cartesian_child('attention_disorder', parents=[genetics], N=1) car_accident = cartesian_child('car_accident', parents=[fatigue, attention_disorder], N=1) model = pymc.Model([ anxiety, peer_pressure, smoking, yellow_fingers, genetics, lung_cancer, allergy, coughing, fatigue, attention_disorder, car_accident ]) coefficients = sample_coeffs(model) anxiety = make_bernoulli('anxiety', N=1, fixed=coefficients) peer_pressure = make_bernoulli('peer_pressure', N=1, fixed=coefficients) smoking = cartesian_child('smoking', parents=[anxiety, peer_pressure], N=1, fixed=coefficients) yellow_fingers = cartesian_child('yellow_fingers', parents=[smoking], N=1, fixed=coefficients) genetics = make_bernoulli('genetics', N=1, fixed=coefficients) lung_cancer = cartesian_child('lung_cancer', parents=[smoking, genetics], N=1, fixed=coefficients) allergy = make_bernoulli('allergy', N=1, fixed=coefficients) coughing = cartesian_child('coughing', parents=[allergy, lung_cancer], N=1, fixed=coefficients) fatigue = cartesian_child('fatigue', parents=[lung_cancer, coughing], N=1, fixed=coefficients) attention_disorder = cartesian_child('attention_disorder', parents=[genetics], N=1, fixed=coefficients) car_accident = cartesian_child('car_accident', parents=[fatigue, attention_disorder], N=1, fixed=coefficients) # sample from the prior model = pymc.Model([ anxiety, peer_pressure, smoking, yellow_fingers, genetics, lung_cancer, allergy, coughing, fatigue, attention_disorder, car_accident ]) sampler = pymc.MCMC(model) sampler.sample(iter=size, burn=0) data = pd.DataFrame({ str(node): sampler.trace(str(node))[:].ravel() + 0 for node in sampler.stochastics }) return data