def _constraintsearch(df, significance_level=0.05, verbose=3): """Contrain search. test_conditional_independence() returns a tripel (chi2, p_value, sufficient_data), consisting in the computed chi2 test statistic, the p_value of the test, and a heuristig flag that indicates if the sample size was sufficient. The p_value is the probability of observing the computed chi2 statistic (or an even higher chi2 value), given the null hypothesis that X and Y are independent given Zs. This can be used to make independence judgements, at a given level of significance. """ out = dict() # Set search algorithm model = ConstraintBasedEstimator(df) # Some checks for dependency # print(_is_independent(est, 'Sprinkler', 'Rain', significance_level=significance_level)) # print(_is_independent(est, 'Cloudy', 'Rain', significance_level=significance_level)) # print(_is_independent(est, 'Sprinkler', 'Rain', ['Wet_Grass'], significance_level=significance_level)) """ DAG (pattern) construction With a method for independence testing at hand, we can construct a DAG from the data set in three steps: 1. Construct an undirected skeleton - `estimate_skeleton()` 2. Orient compelled edges to obtain partially directed acyclid graph (PDAG; I-equivalence class of DAGs) - `skeleton_to_pdag()` 3. Extend DAG pattern to a DAG by conservatively orienting the remaining edges in some way - `pdag_to_dag()` Step 1.&2. form the so-called PC algorithm, see [2], page 550. PDAGs are `DirectedGraph`s, that may contain both-way edges, to indicate that the orientation for the edge is not determined. """ # Estimate using chi2 [skel, seperating_sets ] = model.estimate_skeleton(significance_level=significance_level) print("Undirected edges: ", skel.edges()) pdag = model.skeleton_to_pdag(skel, seperating_sets) print("PDAG edges: ", pdag.edges()) dag = model.pdag_to_dag(pdag) print("DAG edges: ", dag.edges()) out['undirected'] = skel out['undirected_edges'] = skel.edges() out['pdag'] = pdag out['pdag_edges'] = pdag.edges() out['dag'] = dag out['dag_edges'] = dag.edges() # Search using "estimate()" method provides a shorthand for the three steps above and directly returns a "BayesianModel" best_model = model.estimate(significance_level=significance_level) out['model'] = best_model out['model_edges'] = best_model.edges() print(best_model.edges()) """ PC PDAG construction is only guaranteed to work under the assumption that the identified set of independencies is *faithful*, i.e. there exists a DAG that exactly corresponds to it. Spurious dependencies in the data set can cause the reported independencies to violate faithfulness. It can happen that the estimated PDAG does not have any faithful completions (i.e. edge orientations that do not introduce new v-structures). In that case a warning is issued. """ return (out)
def make_model_admission(data): c = PC(data) model = c.estimate(significance_level=0.05) bayesian_model = BayesianModel(model.edges) print(bayesian_model.edges) bayesian_model = BayesianModel([('Extra Curricular', 'Admission'), ('Gender', 'Extra Curricular'), ('Score', 'Admission'), ('Gender', 'Admission'), ('Gender', 'A')]) return bayesian_model
class TimePCAlarmModel: timeout = 600.0 def setup(self): model = get_example_model('alarm') samples = model.simulate(n_samples=int(1e4), seed=42, show_progress=False) self.est = PC(samples) def time_pc_stable(self): self.est.estimate(variant='stable') def time_pc_orig(self): self.est.estimate(variant='orig')
def predict(self, dataset: DatasetInterface) -> List[Relation]: data = dataset.get_data() if self.algorithm == self.ESTIMATOR_PC: estimator = PC(data) graph = estimator.estimate(show_progress=False) elif self.algorithm == self.ESTIMATOR_MMHC: estimator = ExhaustiveSearch(data, show_progress=False) graph = estimator.estimate() else: estimator = HillClimbSearch(data) graph = estimator.estimate(show_progress=False) return PgmpyScript.__build_relations(graph, data)
def call_pgmpy(data): est = PC(data=data) estimated_model = est.estimate(variant='orig', max_cond_vars=5) return estimated_model
def setup(self): model = get_example_model('alarm') samples = model.simulate(n_samples=int(1e4), seed=42, show_progress=False) self.est = PC(samples)
import�pandas�as�pd from�pgmpy.models�import�BayesianModel from�pgmpy.estimators�import�MaximumLikelihoodEstimator,BayesianEstimator from�pgmpy.estimators�import�PC data�=�pd.read_csv("/content/sample_data/heart.csv",sep=',') data.head(6) c�=�PC(data) structure�=�c.estimate() print(structure.edges()) model�=�BayesianModel(structure.edges()) model.fit(data,estimator=MaximumLikelihoodEstimator) from�pgmpy.inference�import�VariableElimination infer�=�VariableElimination(model) q�=�infer.query(variables=['cp','target'],evidence={'sex':0,'exang':1}) print(q)