Beispiel #1
0
def _constraintsearch(df, significance_level=0.05, verbose=3):
    """Contrain search.

    test_conditional_independence() returns a tripel (chi2, p_value, sufficient_data),
    consisting in the computed chi2 test statistic, the p_value of the test, and a heuristig
    flag that indicates if the sample size was sufficient.
    The p_value is the probability of observing the computed chi2 statistic (or an even higher chi2 value),
    given the null hypothesis that X and Y are independent given Zs.
    This can be used to make independence judgements, at a given level of significance.
    """
    out = dict()
    # Set search algorithm
    model = ConstraintBasedEstimator(df)

    # Some checks for dependency
    #    print(_is_independent(est, 'Sprinkler', 'Rain', significance_level=significance_level))
    #    print(_is_independent(est, 'Cloudy', 'Rain', significance_level=significance_level))
    #    print(_is_independent(est, 'Sprinkler', 'Rain',  ['Wet_Grass'], significance_level=significance_level))
    """
    DAG (pattern) construction
    With a method for independence testing at hand, we can construct a DAG from the data set in three steps:
        1. Construct an undirected skeleton - `estimate_skeleton()`
        2. Orient compelled edges to obtain partially directed acyclid graph (PDAG; I-equivalence class of DAGs) - `skeleton_to_pdag()`
        3. Extend DAG pattern to a DAG by conservatively orienting the remaining edges in some way - `pdag_to_dag()`

        Step 1.&2. form the so-called PC algorithm, see [2], page 550. PDAGs are `DirectedGraph`s, that may contain both-way edges, to indicate that the orientation for the edge is not determined.
    """
    # Estimate using chi2
    [skel, seperating_sets
     ] = model.estimate_skeleton(significance_level=significance_level)

    print("Undirected edges: ", skel.edges())
    pdag = model.skeleton_to_pdag(skel, seperating_sets)
    print("PDAG edges: ", pdag.edges())
    dag = model.pdag_to_dag(pdag)
    print("DAG edges: ", dag.edges())

    out['undirected'] = skel
    out['undirected_edges'] = skel.edges()
    out['pdag'] = pdag
    out['pdag_edges'] = pdag.edges()
    out['dag'] = dag
    out['dag_edges'] = dag.edges()

    # Search using "estimate()" method provides a shorthand for the three steps above and directly returns a "BayesianModel"
    best_model = model.estimate(significance_level=significance_level)
    out['model'] = best_model
    out['model_edges'] = best_model.edges()

    print(best_model.edges())
    """
    PC PDAG construction is only guaranteed to work under the assumption that the
    identified set of independencies is *faithful*, i.e. there exists a DAG that
    exactly corresponds to it. Spurious dependencies in the data set can cause
    the reported independencies to violate faithfulness. It can happen that the
    estimated PDAG does not have any faithful completions (i.e. edge orientations
    that do not introduce new v-structures). In that case a warning is issued.
    """
    return (out)
def make_model_admission(data):
    c = PC(data)
    model = c.estimate(significance_level=0.05)
    bayesian_model = BayesianModel(model.edges)
    print(bayesian_model.edges)
    bayesian_model = BayesianModel([('Extra Curricular', 'Admission'),
                                    ('Gender', 'Extra Curricular'),
                                    ('Score', 'Admission'),
                                    ('Gender', 'Admission'), ('Gender', 'A')])
    return bayesian_model
class TimePCAlarmModel:
    timeout = 600.0

    def setup(self):
        model = get_example_model('alarm')
        samples = model.simulate(n_samples=int(1e4),
                                 seed=42,
                                 show_progress=False)
        self.est = PC(samples)

    def time_pc_stable(self):
        self.est.estimate(variant='stable')

    def time_pc_orig(self):
        self.est.estimate(variant='orig')
    def predict(self, dataset: DatasetInterface) -> List[Relation]:
        data = dataset.get_data()

        if self.algorithm == self.ESTIMATOR_PC:
            estimator = PC(data)
            graph = estimator.estimate(show_progress=False)
        elif self.algorithm == self.ESTIMATOR_MMHC:
            estimator = ExhaustiveSearch(data, show_progress=False)
            graph = estimator.estimate()
        else:
            estimator = HillClimbSearch(data)
            graph = estimator.estimate(show_progress=False)

        return PgmpyScript.__build_relations(graph, data)
Beispiel #5
0
def call_pgmpy(data):

    est = PC(data=data)
    estimated_model = est.estimate(variant='orig', max_cond_vars=5)
    return estimated_model
 def setup(self):
     model = get_example_model('alarm')
     samples = model.simulate(n_samples=int(1e4),
                              seed=42,
                              show_progress=False)
     self.est = PC(samples)
Beispiel #7
0
import�pandas�as�pd
from�pgmpy.models�import�BayesianModel
from�pgmpy.estimators�import�MaximumLikelihoodEstimator,BayesianEstimator
from�pgmpy.estimators�import�PC

data�=�pd.read_csv("/content/sample_data/heart.csv",sep=',')
data.head(6)

c�=�PC(data)
structure�=�c.estimate()
print(structure.edges())

model�=�BayesianModel(structure.edges())

model.fit(data,estimator=MaximumLikelihoodEstimator)

from�pgmpy.inference�import�VariableElimination
infer�=�VariableElimination(model)
q�=�infer.query(variables=['cp','target'],evidence={'sex':0,'exang':1})
print(q)