class TimeSampling: timeout = 600.0 def setup(self): self.model = get_example_model('alarm') self.s = BayesianModelSampling(self.model) def time_forward_sample(self): self.model.simulate(n_samples=int(1e4), show_progress=False) def time_rejection_sample(self): self.model.simulate(n_samples=int(1e4), evidence={ "HISTORY": "TRUE", "HR": "NORMAL" }, show_progress=False) def time_likelihood_sample(self): self.s.likelihood_weighted_sample(evidence=[("HISTORY", "TRUE"), ("HR", "NORMAL")], size=int(1e4)) def time_gibbs_sampling(self): gibbs_samples = GibbsSampling(model=self.model) gibbs_sampling.sample(size=int(1e4))
def sampling(model, n=1000, verbose=3): ''' Parameters ---------- model: [DICT] Contains model and adjmat n: [INT] Number of samples to generate n=1000 (default) verbose: [INT] Print messages to screen. 0: NONE 1: ERROR 2: WARNING 3: INFO (default) 4: DEBUG 5: TRACE Returns ------- Pandas DataFrame ''' assert n > 0, 'n must be 1 or larger' assert 'BayesianModel' in str( type(model['model']) ), 'Model must contain DAG from BayesianModel. Note that <misarables> example does not include DAG.' # http://pgmpy.org/sampling.html inference = BayesianModelSampling(model['model']) # inference = GibbsSampling(model) # Forward sampling and make dataframe df = inference.forward_sample(size=n, return_type='dataframe') return (df)
def __init__(self, parameters=None): super().__init__(parameters) # set up the network based on the parameters self.model = DBN() self.model.add_nodes_from(self.parameters['nodes']) self.model.add_edges_from(self.parameters['edges']) print(f'EDGES: {sorted(self.model.edges())}') import ipdb ipdb.set_trace() # TODO -- add 'evidence' -- get from network? cpds = (TabularCPD(variable=node_id, variable_card=len(values), values=values, evidence=[]) for node_id, values in self.parameters['conditional_probabilities']) self.model.add_cpds(cpds) # make an inference instance for sampling the model self.inference = BayesianModelSampling(self.model) # get a sample sample = self.inference.forward_sample(size=2)
def generate_datasets(networks, folder, nb_samples=2000): for network in networks: dataset_out_path = os.path.join(folder, 'datasets', network + '.csv') inference = BayesianModelSampling(networks[network]) samples = inference.forward_sample(size=nb_samples) samples.to_csv(dataset_out_path)
def generate_time_series( sampler: BayesianModelSampling, length: int, labels: typing.List[str], seed: int = 42, ): # Initialize progress bar pbar = notebook.tqdm(total=length) # Generate first sample given no evidence with io.capture_output() as captured: # When no evidence is provided, the function under-the-hood performs forward sampling sample = sampler.rejection_sample(seed=seed) sample = sample.reindex(sorted(sample.columns), axis=1) # Split sample in 'current' and 'next' slices: # - the 'current' slice will be the first row of the generated time series # - the 'next' slice is added as the second row, and will be used as # evidence for subsequent predictions df_synth = sample.filter(regex="_T$") next_slice = sample.filter(regex="_T\+1").iloc[0].values.tolist() df_synth = df_synth.append(pd.Series(next_slice, index=df_synth.columns), ignore_index=True) evidence = [ State(n, v) for n, v in zip(df_synth.columns.values, next_slice) ] # Update progress bar pbar.update(2) for _ in range(2, length): # Generate new data with io.capture_output() as captured: sample = sampler.rejection_sample(evidence=evidence) sample = sample.reindex(sorted(sample.columns), axis=1) # Append 'next' slice to the generated time series, and use it as new evidence next_slice = sample.filter(regex="_T\+1").iloc[0].values.tolist() df_synth = df_synth.append(pd.Series(next_slice, index=df_synth.columns), ignore_index=True) evidence = [ State(n, v) for n, v in zip(df_synth.columns.values, next_slice) ] # Update progress bar pbar.update(1) # Close progress bar pbar.close() # Update column names df_synth.columns = labels return df_synth
def sample(self, nb_sample=1): # sampling of pgmpy samples the index of the values # Here we convert back this index to the actual value def convert(samples): for col in samples.columns: _, states = self.get_state_space(col) samples[col] = samples[col].apply(lambda x: states[x]) return samples inference = BayesianModelSampling(self.bn) samples = inference.forward_sample(size=nb_sample) return convert(samples)
def __init__(self, model, actions, py_func): """ model is a pgmpy.BayesianModel actions is a list of (var,value) tuples """ self.py_func = py_func self.parents = sorted(model.get_parents('Y')) self.N = len(self.parents) self.actions = actions self.K = len(actions) self.observational_model = model self.observational_inference = VariableElimination( self.observational_model) self.post_action_models = [ GeneralModel.do(model, action) for action in actions ] self.samplers = [ BayesianModelSampling(model_a) for model_a in self.post_action_models ] self.interventional_distributions = [] for indx, new_model in enumerate(self.post_action_models): infer = VariableElimination(new_model) _, distribution_over_parents = infer.query(self.parents) self.interventional_distributions.append(distribution_over_parents) self.pre_compute()
def infer(self, evidence, new_evidence): evidence.update(new_evidence) new_model, additional_evidence = self.reduce_model(evidence) try: if self.inference_type == InferenceType.BeliefPropagation: inference = BeliefPropagation(new_model) elif self.inference_type == InferenceType.GibbsSampling: inference = GibbsSampling(new_model) elif self.inference_type == InferenceType.BayesianModelSampler: inference = BayesianModelSampling(new_model) except Exception as e: # for factor in new_model.factors: # print(factor) raise e self.evidence = { var: val for (var, val) in evidence.items() if "F(" not in var } self.evidence.update(additional_evidence) self.inference = inference self.scope = get_scope(new_model) return new_model
def sample(self, n_samples=1) : """ Sample n data points from the Bayesian Network :param n_samples: int, amount of datapoints to generate. :return: Dataframe of new datapoints shape (n_samples,n_features) """ np.random.seed(self.random_state) inference = BayesianModelSampling(self.model) Y = inference.forward_sample(size=n_samples, return_type='dataframe') Y = Y[sorted(Y.columns)] return Y[cols]
def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.sampling_inference = BayesianModelSampling(self.bayesian_model) self.markov_model = MarkovModel()
def setUp(self): self.bayesian_model = BayesianModel([("A", "J"), ("R", "J"), ("J", "Q"), ("J", "L"), ("G", "L")]) cpd_a = TabularCPD("A", 2, [[0.2], [0.8]]) cpd_r = TabularCPD("R", 2, [[0.4], [0.6]]) cpd_j = TabularCPD("J", 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ["R", "A"], [2, 2]) cpd_q = TabularCPD("Q", 2, [[0.9, 0.2], [0.1, 0.8]], ["J"], [2]) cpd_l = TabularCPD("L", 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ["G", "J"], [2, 2]) cpd_g = TabularCPD("G", 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.sampling_inference = BayesianModelSampling(self.bayesian_model) self.markov_model = MarkovModel()
def rejection_estimate(n): inferences = BayesianModelSampling(disease_model) evidences = [ State(var='Fatigue', state=0), State(var='Fever', state=0), State(var='FluShot', state=0) ] p = inferences.rejection_sample(evidences, n) i = 0 for t in range(n): if p['Flu'][t] == float(0): i = i + 1 plt.plot(t, (i / n), 'bo') plt.ylabel('Evolving esimate') plt.xlabel('Number of samples') plt.show()
def getDataset(self, size=1000, return_type='DataFrame'): """ Method: retrun a set of samples generated from Bayesian Network. (Simply using forward-sampling) Parameters ---------- size: size of the dataset to be generated (default: 1000) return_type: return type of dataset (default: panda.DataFrame) """ # For more info, see: likelihood_weighted, rejection or Gibb sampling from pgmpy.sampling import BayesianModelSampling inference = BayesianModelSampling(self.__covid_model) dataset = inference.forward_sample(size=size, return_type=return_type) return dataset
def sampling(DAG, n=1000, verbose=3): """Generate sample(s) using forward sampling from joint distribution of the bayesian network. Parameters ---------- DAG : dict Contains model and adjmat of the DAG. n : int, optional Number of samples to generate. The default is 1000. verbose : int, optional Print progress to screen. The default is 3. 0: None, 1: ERROR, 2: WARN, 3: INFO (default), 4: DEBUG, 5: TRACE Returns ------- df : pd.DataFrame(). Dataframe containing sampled data from the input DAG model. Example ------- >>> import bnlearn >>> DAG = bnlearn.import_DAG('sprinkler') >>> df = bnlearn.sampling(DAG, n=1000) """ if n <= 0: raise ValueError('n must be 1 or larger') if 'BayesianModel' not in str(type(DAG['model'])): raise ValueError('DAG must contain BayesianModel.') if verbose >= 3: print('[bnlearn] >Forward sampling for %.0d samples..' % (n)) if len(DAG['model'].get_cpds()) == 0: print( '[bnlearn] >This seems like a DAG containing only edges, and no CPDs. Tip: use bn.parameter_learning.fit(DAG, df) to learn the CPDs first.' ) return # http://pgmpy.org/sampling.html infer_model = BayesianModelSampling(DAG['model']) # inference = GibbsSampling(model['model']) # Forward sampling and make dataframe df = infer_model.forward_sample(size=n, return_type='dataframe') return (df)
def sample(N): bn_generate = BayesianModel([('D', 'G'), ('I', 'G'), ('E', 'L'), ('G', 'L')]) cpd_d = TabularCPD('D', 2, [[0.6], [0.4]]) cpd_i = TabularCPD('I', 2, [[0.7], [0.3]]) cpd_g = TabularCPD('G', 3, [[0.3, 0.9, 0.05, 0.5], [0.4, 0.08, 0.25, 0.3], [0.3, 0.02, 0.7, 0.2]], ['D', 'I'], [2, 2]) cpd_e = TabularCPD('E', 2, [[0.5], [0.5]]) cpd_l = TabularCPD( 'L', 2, [[0.1, 0.3, 0.4, 0.25, 0.8, 0.99], [0.9, 0.7, 0.6, 0.75, 0.2, 0.01]], ['G', 'E'], [3, 2]) bn_generate.add_cpds(cpd_d, cpd_i, cpd_g, cpd_e, cpd_l) infer = BayesianModelSampling(bn_generate) data = infer.forward_sample(N) return data, bn_generate
def sampling(model, n=1000, verbose=3): """Sample based on DAG. Parameters ---------- model : dict Contains model and adjmat. n : int, optional Number of samples to generate. The default is 1000. verbose : int, optional Print progress to screen. The default is 3. 0: NONE 1: ERROR 2: WARNING 3: INFO (default) 4: DEBUG 5: TRACE Returns ------- df : pd.DataFrame(). Example ------- >>> import bnlearn >>> model = bnlearn.import_DAG('sprinkler') >>> df = bnlearn.sampling(model, n=1000) """ assert n > 0, 'n must be 1 or larger' assert 'BayesianModel' in str( type(model['model']) ), 'Model must contain DAG from BayesianModel. Note that <misarables> example does not include DAG.' if verbose >= 3: print('[BNLEARN][sampling] Forward sampling for %.0d samples..' % (n)) # http://pgmpy.org/sampling.html inference = BayesianModelSampling(model['model']) # inference = GibbsSampling(model) # Forward sampling and make dataframe df = inference.forward_sample(size=n, return_type='dataframe') return (df)
def sample_slots(model_info_file, mr_slot_names): model_info = helpers.load_from_pickle(model_info_file) model = model_info['model'] inference = BayesianModelSampling(model) # use the missing mr slots as evidence all_slots = model_info['all_slots'] missing_slots = [mr for mr in all_slots if mr not in mr_slot_names] evidence = [State(mr, 0) for mr in missing_slots] inference = BayesianModelSampling(model) # don't allow empty samples sampled_slots = [] while (sampled_slots == []): sample = inference.rejection_sample(evidence=evidence, size=1, return_type='recarray') # return a list of the column names which had presence sampled_slots = [ name for var, name in zip(sample.view('<i8'), sample.dtype.names) if var == 1 ] return sampled_slots
def sample_dag(dag, num): #zzz this loses disconnected nodes!!! # bayesmod = BayesianModel(dag.edges()) # bayesmod = BayesianModel(dag) bayesmod = BayesianModel() bayesmod.add_nodes_from(dag.nodes()) bayesmod.add_edges_from(dag.edges()) tab_cpds = [] cards = {node: len(dag.node[node]['cpd']) for node in dag.nodes()} for node in dag.nodes(): parents = dag.predecessors(node) cpd = dag.node[node]['cpd'] if parents: parent_cards = [cards[par] for par in parents] logging.debug("TablularCPD({}, {}, {}, {}, {})".format( node, cards[node], cpd, parents, parent_cards)) tab_cpds.append( TabularCPD(node, cards[node], cpd, parents, parent_cards)) else: logging.debug("TablularCPD({}, {}, {})".format( node, cards[node], cpd)) tab_cpds.append(TabularCPD(node, cards[node], cpd)) logging.debug("cpds add: {}".format(tab_cpds)) print "model variables:", bayesmod.nodes() for tab_cpd in tab_cpds: print "cpd variables:", tab_cpd.variables bayesmod.add_cpds(*tab_cpds) logging.debug("cpds get: {}".format(bayesmod.get_cpds())) inference = BayesianModelSampling(bayesmod) logging.debug("generating data") recs = inference.forward_sample(size=num, return_type='recarray') return recs
class DynamicBayesianNetwork(Process): defaults = { 'nodes': [], 'edges': [], 'conditional_probabilities': { 'node_id': [] } } def __init__(self, parameters=None): super().__init__(parameters) # set up the network based on the parameters self.model = DBN() self.model.add_nodes_from(self.parameters['nodes']) self.model.add_edges_from(self.parameters['edges']) print(f'EDGES: {sorted(self.model.edges())}') import ipdb ipdb.set_trace() # TODO -- add 'evidence' -- get from network? cpds = (TabularCPD(variable=node_id, variable_card=len(values), values=values, evidence=[]) for node_id, values in self.parameters['conditional_probabilities']) self.model.add_cpds(cpds) # make an inference instance for sampling the model self.inference = BayesianModelSampling(self.model) # get a sample sample = self.inference.forward_sample(size=2) def ports_schema(self): return {} def next_update(self, timestep, states): return {}
cpd_e = TabularCPD(variable='E', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['M'], evidence_card=[2]) # Associating the CPDs with the network model.add_cpds(cpd_d, cpd_m, cpd_r, cpd_l, cpd_e) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. model.check_model() # Forward_sample then iterate and count strong musician/ good letter/both inference = BayesianModelSampling(model) numSamples = 10000 samples = inference.forward_sample(size=numSamples, return_type='recarray') part1 = 0 strongLetter = 0 weakMusician = 0 strongLetterWeakMuscician = 0 # Samples have structure (M E D R L) for sample in samples: # P(m = strong)P(d = low)P(r = ∗ ∗ |m = strong, d = low)P(e = high|m = strong)P(letter = weak| ∗ ∗) if sample[0] and not sample[2] and sample[3] == 2 and sample[ 1] and not sample[4]: part1 += 1 # P(letter = strong)
# Associating the CPDs with the network pg_model.add_cpds(cpd_parent_edu, cpd_screentime, cpd_physical, cpd_obesity, cpd_self_harm) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. pg_model.check_model() # examine conditional independence relationships: pg_model.local_independencies("parent_education") pg_model.local_independencies("child_obesity") pg_model.local_independencies("child_screen_time") pg_model.local_independencies("child_physical_activity") # sample data from the network: inference = BayesianModelSampling(pg_model) sim_n = 50_000 simulated_sample = inference.forward_sample(size=sim_n) for colname_j in simulated_sample.columns: simulated_sample[colname_j] = ( simulated_sample[colname_j] == "high").astype(int) # draw correlation plot of the variables: corr_mat = simulated_sample.corr() corr_mat.style.background_gradient(cmap="coolwarm").set_precision(2) # eaxmple: if we condition on "child_screen_time".. # ..then "child_physical_activity" becomes independent of "parent_education": corr_mat = simulated_sample.query("child_screen_time==1").drop( "child_screen_time", axis=1).corr() corr_mat.style.background_gradient(cmap="coolwarm").set_precision(2)
[0.1, 0.2, 1, 1, 0.8, 0.9, 1, 1] ], #p(~G) evidence=[ 'BrokeElectionLaw', 'PoliticallyMotivatedProsecutor', 'Indicted' ], evidence_card=[2, 2, 2]) cpd_j = TabularCPD(variable='Jailed', variable_card=2, values=[[0.9, 0.0], [0.1, 1.0]], evidence=['FoundGuilty'], evidence_card=[2]) #Associar os model aos nodos election_model.add_cpds(cpd_b, cpd_i, cpd_m, cpd_g, cpd_j) #Verificar as independencias print(election_model.get_independencies()) samples = BayesianModelSampling(election_model).forward_sample(size=int(1e5)) samples.head() #Mostrar estimativas mle = MaximumLikelihoodEstimator(model=election_model, data=samples) print("\nEstimating the CPD for a single node.\n") print(mle.estimate_cpd(node='BrokeElectionLaw')) print(mle.estimate_cpd(node='PoliticallyMotivatedProsecutor')) print(mle.estimate_cpd(node='Indicted')) print(mle.estimate_cpd(node='FoundGuilty')) print(mle.estimate_cpd(node='Jailed'))
model = BayesianModel([('IncomeQ', 'Bedrooms'), ('HhSize', 'Bedrooms'), ('IncomeQ', 'RentQ'), ('Bedrooms', 'RentQ')]) #nx.draw_networkx(model, with_labels=True) modelData = hh[model.nodes()].copy() testData = modelData.iloc[int(0.85 * modelData.shape[0]):int(modelData.shape[0] )].copy() trainData = modelData.iloc[0:int(0.85 * modelData.shape[0])].copy() model.fit(trainData, estimator=MaximumLikelihoodEstimator) #for cpd in model.get_cpds(): # print("CPD of {variable}:".format(variable=cpd.variable)) # print(cpd) model_sample = BayesianModelSampling(model) pickle.dump(model_sample, open('results/sampler.p', 'wb')) # open the nhts sample and add the inferred resType requirements nhtsSample = pd.read_csv('results/nhtsSample.csv') resType = [] for ind, row in nhtsSample.iterrows(): evidence = [ State('IncomeQ', min(row['hh_income'] - 1, 10)), State('HhSize', min(row['hh_size'] - 1, 5)) ] sample = model_sample.likelihood_weighted_sample(evidence=evidence, size=1) resType.extend([int(sample['Bedrooms']) * 3 + int(sample['RentQ'])]) nhtsSample['resType'] = resType os.chdir('..') nhtsSample[nhtsSample['occupation_type'] == 1].sample(
cpd1=[] cpd1.append(p_21) cpd1.append(p_52) cpd1.append(p_14) cpd1.append(p_64) cpd1.append(p_36) cpd1.append(p4) model1.add_cpds(*cpd1) print("------------------------------------------") print("Edges of model1:", model1.edges()) print("Checking Model1:", model1.check_model()) print("------------------------------------------") '''generate data for model1''' inference = BayesianModelSampling(model1) data=inference.forward_sample(size=3000, return_type='dataframe') print("Data for model1:") print(data) k2=K2Score(data) print('Model1 K2 Score: ' + str(k2.score(model1))) '''Inference''' from pgmpy.inference import VariableElimination infer = VariableElimination(model1) print("Inference of x3:") print(infer.query(['x3']) ['x3']) print("Inference of x5|x2:") print(infer.query(['x5'], evidence={ 'x2': 1}) ['x5'])
def task3(): global task4_best_bm, task2_best_bm, task2_best_mm, task4_best_mm st1 = time.time() task2_best_bm_samples = (BayesianModelSampling(task2_best_bm)).forward_sample(size=1000) et1 = time.time() diff1 = et1 - st1 task2_best_bm_samplesC = task2_best_bm_samples.copy() task2_best_bm_samplesC.drop('x1', axis=1, inplace=True) task2_bm_predicted = task2_best_bm.predict(task2_best_bm_samplesC) task2_best_mm = task2_best_bm.to_markov_model() st2 = time.time() task2_best_mm_samples = (GibbsSampling(task2_best_mm)).sample(size=1000) et2 = time.time() diff2 = et2 - st2 task2_best_mm_samples_values = (task2_best_mm_samples.values) task2_mm_predicted=[] task2_mmprop = BeliefPropagation(task2_best_mm) for i in range(1000): nik_temp = np.array(task2_best_mm_samples_values[i,:]) try: task2_mm_predicted.append((task2_mmprop.map_query(variables=['x1'], evidence={ 'x2':int(nik_temp[2]), 'x3':int(nik_temp[1]), 'x4':int(nik_temp[5]), 'x5':int(nik_temp[0]), 'x6':int(nik_temp[4]) }))) except: task2_mm_predicted.append({'x1':-1}) cnt1=0 cnt2=0 data1 = task2_best_mm_samples[['x1']].as_matrix() data2 = task2_best_bm_samples[['x1']].as_matrix() for i in range(1000): if(task2_mm_predicted[i]['x1']==int(data1[i])): cnt1=cnt1+1 #if(task2_bm_predicted[i]['x1']==int(data2[i])): #cnt2=cnt2+1 task2_mm_acc = cnt1/10.0 task2_bm_acc = cnt2/10.0 print(" Bayesian Model for 'th' data : "+str(task2_best_bm.edges())) print(" Bayesian Model for 'th' data takes time : "+str(diff1)) #print(" Bayesian Model for 'th' data has accuracy : "+str(task2_mm_acc)) print(" Markov Model for 'th' data : "+str(task2_best_mm.edges())) print(" Markov Model for 'th' data takes time : "+str(diff2)) print(" Markov Model for 'th' data has accuracy : "+str(task2_mm_acc)) st3 = time.time() task4_best_bm_samples = (BayesianModelSampling(task4_best_bm)).forward_sample(size=1000) et3 = time.time() diff3 = et3 - st3 ''' task4_best_bm_samplesC = task4_best_bm_samples.copy() task4_best_bm_samplesC.drop('f1', axis=1, inplace=True) #print(task4_best_bm_samplesC) task4_bm_predicted = task4_best_bm.predict(task4_best_bm_samplesC) #print(task4_bm_predicted) ''' task4_best_mm = task4_best_bm.to_markov_model() st4 = time.time() task4_best_mm_samples = (GibbsSampling(task4_best_mm)).sample(size=1000) et4 = time.time() diff4 = et4 - st4 '''print(task4_best_mm_samples) task4_best_mm_samples_values = (task4_best_mm_samples.values) task4_mm_predicted=[] task4_mmprop = BeliefPropagation(task4_best_mm) for i in range(1000): nik_temp = np.array(task4_best_mm_samples_values[i,:]) print((nik_temp)) try: task4_mm_predicted.append((task4_mmprop.map_query(variables=['f1'], evidence={ 'f2':int(nik_temp[2]), 'f3':int(nik_temp[1]), 'f4':int(nik_temp[5]), 'f5':int(nik_temp[0]), 'f6':int(nik_temp[4]), 'f7':int(nik_temp[2]), 'f8':int(nik_temp[1]), 'f9':int(nik_temp[5]) }))) except: task4_mm_predicted.append({'f1':-1}) cnt1=0 cnt2=0 data1 = task4_best_mm_samples[['f1']].as_matrix() data2 = task4_best_bm_samples[['f1']].as_matrix() for i in range(1000): if(task2_mm_predicted[i]['x1']==int(data1[i])): cnt1=cnt1+1 if(task2_bm_predicted[i]['x1']==int(data2[i])): cnt2=cnt2+1 task2_mm_acc = cnt1/10.0 task2_bm_acc = cnt2/10.0''' print(" Bayesian Model for 'and' data : "+str(task4_best_bm.edges())) print(" Bayesian Model for 'and' data takes time : "+str(diff3)) #print(" Bayesian Model for 'th' data has accuracy : "+str(task2_mm_acc)) print(" Markov Model for 'and' data : "+str(task4_best_mm.edges())) print(" Markov Model for 'and' data takes time : "+str(diff4))
class TestBayesianModelSampling(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.sampling_inference = BayesianModelSampling(self.bayesian_model) self.markov_model = MarkovModel() def test_init(self): with self.assertRaises(TypeError): BayesianModelSampling(self.markov_model) def test_forward_sample(self): sample = self.sampling_inference.forward_sample(25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) def test_rejection_sample_basic(self): sample = self.sampling_inference.rejection_sample( [State('A', 1), State('J', 1), State('R', 1)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({1})) self.assertTrue(set(sample.J).issubset({1})) self.assertTrue(set(sample.R).issubset({1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) @patch("pgmpy.sampling.BayesianModelSampling.forward_sample", autospec=True) def test_rejection_sample_less_arg(self, forward_sample): sample = self.sampling_inference.rejection_sample(size=5) forward_sample.assert_called_once_with(self.sampling_inference, 5) self.assertEqual(sample, forward_sample.return_value) def test_likelihood_weighted_sample(self): sample = self.sampling_inference.likelihood_weighted_sample( [State('A', 0), State('J', 1), State('R', 0)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 7) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertIn('_weight', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) def tearDown(self): del self.sampling_inference del self.bayesian_model del self.markov_model
def test_init(self): with self.assertRaises(TypeError): BayesianModelSampling(self.markov_model)
def task4(): global andRawData, task4_best_bm k2Scores = [] andRawData_temp = pd.DataFrame(andRawData.values, columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9']) #Model 1 est = HillClimbSearch(andRawData_temp, scoring_method=K2Score(andRawData_temp)) model_temp = est.estimate() estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 1: Model through HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 1: K2 Accuracy Score is "+str(k2Scores_temp)) #Model 2: Manual Model based on HillClimbSearch model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f3', 'f8'), ('f1', 'f7'), ('f5', 'f3'), ('f9', 'f8'), ('f1', 'f6'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f2')]) estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 2: Manual Model based on HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 2: K2 Accuracy Score is "+str(k2Scores_temp)) #Model 3: Manual Model based on HillClimbSearch model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f3', 'f8'), ('f5', 'f7'), ('f5', 'f3'), ('f9', 'f8'), ('f1', 'f2'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f2')]) estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 3: Manual Model based on HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 3: K2 Accuracy Score is "+str(k2Scores_temp)) #Model 4: Manual Model based on HillClimbSearch model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f5', 'f7'), ('f5', 'f3'), ('f1', 'f2'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f8'),]) estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 4: Manual Model based on HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 4: K2 Accuracy Score is "+str(k2Scores_temp)) #Model 5: Manual Model based on Intuition model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f4', 'f7'), ('f1', 'f2'), ('f8', 'f5'), ('f9', 'f6'), ('f9', 'f8')]) estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 5: Manual Model based on HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 5: K2 Accuracy Score is "+str(k2Scores_temp)) task4_best_bm = task4_bms[k2Scores.index(max(k2Scores))] print(" Best Bayesian Model with the highest accuracy score is thus Model "+str(1+k2Scores.index(max(k2Scores))))
class TestBayesianModelSampling(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.sampling_inference = BayesianModelSampling(self.bayesian_model) self.markov_model = MarkovModel() def test_init(self): with self.assertRaises(TypeError): BayesianModelSampling(self.markov_model) def test_forward_sample(self): sample = self.sampling_inference.forward_sample(25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) def test_rejection_sample_basic(self): sample = self.sampling_inference.rejection_sample([State('A', 1), State('J', 1), State('R', 1)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({1})) self.assertTrue(set(sample.J).issubset({1})) self.assertTrue(set(sample.R).issubset({1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) @patch("pgmpy.sampling.BayesianModelSampling.forward_sample", autospec=True) def test_rejection_sample_less_arg(self, forward_sample): sample = self.sampling_inference.rejection_sample(size=5) forward_sample.assert_called_once_with(self.sampling_inference, 5) self.assertEqual(sample, forward_sample.return_value) def test_likelihood_weighted_sample(self): sample = self.sampling_inference.likelihood_weighted_sample([State('A', 0), State('J', 1), State('R', 0)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 7) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertIn('_weight', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) def tearDown(self): del self.sampling_inference del self.bayesian_model del self.markov_model
def bayesian_net(): musicianship_model = BayesianModel([('Difficulty', 'Rating'), ('Musicianship', 'Rating'), ('Musicianship', 'Exam'), ('Rating', 'Letter')]) cpd_diff = TabularCPD(variable='Difficulty', variable_card=2, values=[[0.6], [0.4]]) #0->Low, 1->High cpd_music = TabularCPD(variable='Musicianship', variable_card=2, values=[[0.7], [0.3]]) #0->Weak 1->Strong cpd_rating = TabularCPD(variable='Rating', variable_card=3, values=[[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['Difficulty', 'Musicianship'], evidence_card=[2, 2]) #0->* 1->** 2-->*** cpd_exam = TabularCPD(variable='Exam', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['Musicianship'], evidence_card=[2]) #0-->Low 1-->High cpd_letter = TabularCPD(variable='Letter', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['Rating'], evidence_card=[3]) #0-->Weak 1-->Strong musicianship_model.add_cpds(cpd_diff, cpd_music, cpd_rating, cpd_exam, cpd_letter) musicianship_model.check_model() infer = SimpleInference(musicianship_model) # query without normalization print('------------------------') print(' EXACT INFERENCE') print('------------------------') print('--------------------') print( ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1 NOT NORMALIZED' ) print('--------------------') print( infer.query(['Letter'], evidence={('Difficulty', 0), ('Musicianship', 1), ('Rating', 1), ('Exam', 1)})) print('--------------------') print( ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1 NORMALIZED' ) print('--------------------') infer = VariableElimination(musicianship_model) # query normalized print( infer.query(['Letter'], evidence={ 'Difficulty': 0, 'Musicianship': 1, 'Rating': 1, 'Exam': 1 })['Letter']) print('--------------------') print(' QUERY Letter with no evidence') print('--------------------') print(infer.query(['Letter'])['Letter']) print('--------------------') print(' QUERY Letter with evidence Musicianship: 0 NORMALIZED') print('--------------------') print(infer.query(['Letter'], evidence={'Musicianship': 0})['Letter']) sampling = BayesianModelSampling(musicianship_model) data = sampling.likelihood_weighted_sample(evidence={}, size=2000, return_type='dataframe') musicianship_model_bis = BayesianModel([('Difficulty', 'Rating'), ('Musicianship', 'Rating'), ('Rating', 'Letter'), ('Musicianship', 'Exam')]) musicianship_model_bis.fit(data, estimator=BayesianEstimator) musicianship_model_bis.check_model() infer = VariableElimination(musicianship_model_bis) # query normalized for cpd in musicianship_model_bis.get_cpds(): print("CPD of {variable}:".format(variable=cpd.variable)) print(cpd) print('------------------------') print(' APPROXIMATE INFERENCE') print('------------------------') print('--------------------') print( ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1 NORMALIZED' ) print('--------------------') print( infer.query(['Letter'], evidence={ 'Difficulty': 0, 'Musicianship': 1, 'Rating': 1, 'Exam': 1 })['Letter']) print('--------------------') print(' QUERY Letter with no evidence') print('--------------------') print(infer.query(['Letter'])['Letter']) print('--------------------') print(' QUERY Letter with evidence Musicianship: 0 NORMALIZED') print('--------------------') print(infer.query(['Letter'], evidence={'Musicianship': 0})['Letter'])
belpro.map_query(variables=['attendance'], evidence={ 'difficulty': 2, 'Q9': 3 })) # print(belpro.map_query(variables=['Q25', 'Q18','Q16'],evidence={'instr':1})) print( belpro.map_query(variables=['attendance', 'Q9', 'difficulty'], evidence={'class': 7})) #Commented some queries because taking a lot of time to run # print(belpro.map_query(variables=['Q28','Q11'],evidence={'instr':2, 'class':10})) # print(belpro.map_query(variables=['Q18', 'Q26','Q13'],evidence={'instr':2})) # print(belpro.map_query(variables=['Q23', 'Q21','Q17'],evidence={'instr':2})) inference = BayesianModelSampling(bayesmodel) df = inference.forward_sample(5) # print df.shape print df print np.mean(df) # print scipy.stats.entropy(df) dataarray = panda.DataFrame.as_matrix(df) print dataarray arr = dataarray.astype(float) print arr sum1 = [] total = 0 count = 0