Example #1
0
    def __init__(self, model, actions, py_func):
        """ model is a pgmpy.BayesianModel
            actions is a list of (var,value) tuples """
        self.py_func = py_func
        self.parents = sorted(model.get_parents('Y'))
        self.N = len(self.parents)
        self.actions = actions
        self.K = len(actions)

        self.observational_model = model
        self.observational_inference = VariableElimination(
            self.observational_model)
        self.post_action_models = [
            GeneralModel.do(model, action) for action in actions
        ]
        self.samplers = [
            BayesianModelSampling(model_a)
            for model_a in self.post_action_models
        ]

        self.interventional_distributions = []
        for indx, new_model in enumerate(self.post_action_models):
            infer = VariableElimination(new_model)
            _, distribution_over_parents = infer.query(self.parents)
            self.interventional_distributions.append(distribution_over_parents)

        self.pre_compute()
Example #2
0
def network_inference(network_model, data):
    """    
    predict the probability of the state of the fault mode(all the missing variables).
    
    input:pandas Series object at each time!!!
    output:the predicted state
    """
    # 输入情况异常处理
    if set(data.index) == set(network_model.nodes()):
        raise ValueError("No variable missing in data. Nothing to predict")
    elif set(data.index) - set(network_model.nodes()):
        raise ValueError("Data has variables which are not in the model")

    missing_variables = set(network_model.nodes()) - set(data.index)
    # 选择精确推理,变量消除
    model_inference = VariableElimination(network_model)
    # iterrows 和 下面的 items 分别是针对datafram和字典创建的生成迭代器
    states_dict = model_inference.query(variables=missing_variables,
                                        evidence=data.to_dict())  #对每行(条)状态进行推理
    for k, v in states_dict.items():
        l = len(v.values) - 1
        if v.values[l] > 0.5:
            print(k, 'probabilily occured: %.3f' % v.values[l])
        else:
            #print('Normal')
            return states_dict
Example #3
0
def testCalifornia(true_ny_data, true_ca_data, predicted_ca_data,
                   predicted_ny_data):
    ca_infer = VariableElimination(cities_model)

    avg_prob = 0.0

    # Computing the probability of bronc given smoke.
    for index, true_ny_label in enumerate(true_ny_data):
        print "True NY label " + str(true_ny_label)
        true_ca_label = true_ca_data[index]
        print "True CA label " + str(true_ca_label)

        predicted_ca_label = predicted_ca_data[index]
        print "Predicted CA label " + str(predicted_ca_label)

        predicted_ny_label = predicted_ny_data[index]
        prob_ca_protest = ca_infer.query(variables=['True CA'],
                                         evidence={
                                             'True NY': true_ny_label,
                                             'Predicted CA':
                                             predicted_ca_label,
                                             'Predicted NY': predicted_ny_label
                                         })
        factor = prob_ca_protest['True CA']
        print factor
        # print factor.assignment([1])
        # avg_prob += prob_ca_protest

    # avg_prob /= len(true_ny_data)
    print avg_prob
Example #4
0
    def Test_Data_Inference_n_steps(self, df_test, n_tsteps):
        # make a function that can predict N timesteps ahead.
        df_inference_results = df_test.filter(items=COLUMN_SEQUENCE).copy()
        infer = VariableElimination(self.model)

        dict_unique_vals = dict(zip(df_test.columns, [df_test[i].unique() for i in df_test.columns]))
        result_list = ['M_t']
        if n_tsteps > 1:
            result_list = result_list+["M_t+{}".format(x) for x in range(1,n_tsteps)]
        count = 0
        self.debug_timmer = []
        for key, value in df_test.filter(items=[x for x in df_test.columns if x not in result_list]).to_dict('index').items():

                index_key = key

                if check_data_in_evidence(value, dict_unique_vals):
                    for query_var in result_list:
                        tic = time.time()
                        result = infer.query(variables=[query_var],evidence=value)
                        toc = time.time() - tic
                        self.debug_timmer.append(toc)
                        df_inference_results.at[index_key,'{}_0'.format(query_var)] = result[query_var].values[0]
                        df_inference_results.at[index_key,'{}_1'.format(query_var)] = result[query_var].values[1]
                        df_inference_results.at[index_key,'{}'.format(query_var)] = Map_Occ_Values(result[query_var].values[1])
                else:
                    for query_var in result_list:
                        df_inference_results.at[index_key,'{}_0'.format(query_var)] = np.nan
                        df_inference_results.at[index_key,'{}_1'.format(query_var)] = np.nan
                        df_inference_results.at[index_key,'{}'.format(query_var)] = np.nan

                count+=1

        logging.info("thermostat {} - Iterations of test {}".format(self.thermostat.tstat_id, count))
        return df_inference_results
Example #5
0
def Inference(model, variables, evidence, treshold):
    #根据evidence查询variable的概率
    #evidences: {var1:label, var2: label}
    #variables: [var3, var4]
    #treshold: 控制最大概率的阈值
    #返回值: 最大概率取值/对应概率为字典
    result = {}
    try:
        infer = VariableElimination(model)
        inferresult = infer.query(variables=variables, evidence=evidence)
        for var in variables:
            tmpvalue = inferresult[var].values.astype('float32')
            print(tmpvalue)
            result[var] = {}
            if len(tmpvalue[tmpvalue == max(
                    tmpvalue)]) == 1 and max(tmpvalue) > treshold:
                result[var] = {
                    'value': np.argmax(tmpvalue),
                    'prob': max(tmpvalue)
                }
            else:
                result[var] = {'value': np.NaN, 'prob': np.NaN}
    except:
        print(traceback.format_exc())
        return False, result
    return True, result
Example #6
0
def inference(model, variables=None, evidence=None, verbose=3):
    '''
    Inference is same as asking conditional probability questions to the models. 
    i.e., What is the probability of a sprinkler is on given that it is raining which is basically equivalent of asking $ P(g^1 | i^1) $. 
    Inference algorithms deals with efficiently finding these conditional probability queries.
    
    There are two main categories for inference algorithms:
        1. Exact Inference: These algorithms find the exact probability values for our queries.
        2. Approximate Inference: These algorithms try to find approximate values by saving on computation.
    
    Exact Inference
        There are multiple algorithms for doing exact inference. 
    
    Two common Inference algorithms with variable Elimination
        1. Clique Tree Belief Propagation
        2. Variable Elimination
    
    The basic concept of variable elimination is same as doing marginalization over Joint Distribution. 
    But variable elimination avoids computing the Joint Distribution by doing marginalization over much smaller factors. 
    So basically if we want to eliminate $ X $ from our distribution, then we compute 
    the product of all the factors involving $ X $ and marginalize over them, 
    thus allowing us to work on much smaller factors.
    
    In the above equation we can see that we pushed the summation inside and operated 
    the summation only factors that involved that variable and hence avoiding computing the 
    complete joint distribution.
    '''

    model_infer = VariableElimination(model['model'])
    # Computing the probability of Wet Grass given Rain.
    q = model_infer.query(variables=variables, evidence=evidence)
    print(q)
    # for varname in variables:
    # print(q[varname])
    return (q)
Example #7
0
    def inference(self, variables, evidence, mode="auto", log=True):
        ''' (6)
        Computes the inference over some variables of the network (given some evidence)
        '''

        inference = VariableElimination(self.best_model)
        #inference = BeliefPropagation(self.markov)
        #inference = Mplp(self.best_model)
        header = "------------------- INFERENCE ------------------------"
        self.log(header, log)
        self.file_writer.write_txt(header, newline=True)
        self.file_writer.write_txt("(With parents all set to value 1)")

        if mode == "auto":
            self.log("          (with parents all set to value 1)", log)
            for node in self.best_model.nodes():
                variables = [node]
                parents = self.best_model.get_parents(node)
                evidence = dict()
                for p in parents:
                    evidence[p] = 1
                phi_query = inference.query(variables, evidence)
                for key in phi_query:
                    self.file_writer.write_txt(str(phi_query[key]))
                    self.log(phi_query[key], log)

        elif mode == "manual":
            phi_query = inference.query(variables, evidence)
            for key in phi_query:
                self.log(phi_query[key], log)
            '''
Example #8
0
    def inf(self, file1):
        f1 = open(file1, encoding="utf8")
        lines = f1.readlines()
        i = 0
        G = BayesianModel()
        nodeList = {}
        while i < len(lines):
            if lines[i] == '\n':
                break
            nodeName = self.getnode(lines[i])
            valueNum = int(lines[i + 1])
            cpd_str = lines[i + 2]
            sequence = self.getList(lines[i + 3])
            card = self.getCard(lines[i + 4])
            cpd = self.parseCpd(cpd_str, valueNum, card)
            l = {}
            l['nodeName'] = nodeName
            l['valueNum'] = valueNum
            l['cpd'] = cpd
            l['sequence'] = sequence
            l['card'] = card
            # l = [nodeName,valueNum,cpd,sequence,card]

            nodeList[nodeName] = l
            i += 5
        edges = self.getegdes(lines[i + 1])
        evidence2 = self.getValue(lines[i + 3])

        # print(nodeList)
        for i in range(int(len(edges) / 2)):
            G.add_edge(edges[2 * i], edges[2 * i + 1])

        for (this, node) in nodeList.items():
            if node['sequence'][0] == '':
                cpt = TabularCPD(variable=node['nodeName'],
                                 variable_card=node['valueNum'],
                                 values=node['cpd'])
            else:
                cpt = TabularCPD(variable=node['nodeName'],
                                 variable_card=node['valueNum'],
                                 evidence=node['sequence'],
                                 evidence_card=node['card'],
                                 values=node['cpd'])
            G.add_cpds(cpt)

        if G.check_model():
            # print('1')
            # belief_propagation = BeliefPropagation(G)
            inference = VariableElimination(G)
            result = ''

            for node in G.nodes():
                if node not in evidence2:
                    namelist = [node]
                    result += node + ' '
                    phi_query = inference.query(variables=namelist,
                                                evidence=evidence2,
                                                show_progress=False).values
                    result += str(phi_query) + '\n'
            print(result)
Example #9
0
    def print_beliefs(self):

        s_infer = VariableElimination(self._suspects_model)
        s_dis = s_infer.query(variables=['s'], show_progress=False)
        print(s_dis)
        print(self._suspects_in_order)

        w_infer = VariableElimination(self._weapons_model)
        w_dis = w_infer.query(variables=['w'], show_progress=False)
        print(w_dis)
        print(self._weapons_in_order)

        r_infer = VariableElimination(self._rooms_model)
        r_dis = r_infer.query(variables=['r'], show_progress=False)
        print(r_dis)
        print(self._rooms_in_order)
Example #10
0
        def process_segments(size):
            global Distribution
            loop = int(np.ceil(float(self.data_size) / size))
            last_size = self.data_size - size * (loop - 1)
            print('size: ', size, ' | last_size ', last_size, ' | loop: ',
                  loop)
            for i in range(loop):
                print('process: ', i)
                self.model.fit(self.data.loc[i * size:(i + 1) * size],
                               estimator_type=BayesianEstimator,
                               prior_type="BDeu",
                               equivalent_sample_size=1,
                               state_names=self.state_names)
                infer = VariableElimination(self.model)
                query_time_frame_1(infer)
                query_time_frame_2(infer)
                query_time_frame_3(infer)

            for node in list(nodes + nodes2 + nodes3):
                temp = [0] * len(self.state_names[node])
                length_distribution = len(Distribution[node])
                length_state_name = len(self.state_names[node])
                for distribution_index in range(0, length_distribution - 1):
                    for value_distr_index in range(length_state_name):
                        temp[value_distr_index] += Distribution[node][
                            distribution_index][value_distr_index]
                percent = float(last_size) / size
                for value_distr_index in range(length_state_name):
                    temp[value_distr_index] += (
                        Distribution[node][-1][value_distr_index] * percent)
                Distribution[node] = [x * size / self.data_size for x in temp]
Example #11
0
def main():
    # Defining the network structure
    model = BayesianModel([('C', 'H'), ('P', 'H')])

    # H: host
    # P: prize
    # C: contestant

    # Defining the CPDs:
    cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]])
    cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]])
    cpd_h = TabularCPD('H',
                       3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5],
                           [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5],
                           [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]],
                       evidence=['C', 'P'],
                       evidence_card=[3, 3])

    # Associating the CPDs with the network structure.
    model.add_cpds(cpd_c, cpd_p, cpd_h)

    # Some other methods
    # model.get_cpds()

    # check_model check for the model structure and the associated CPD and
    # returns True if everything is correct otherwise throws an exception
    # print model.check_model()

    # Infering the posterior probability
    infer = VariableElimination(model)
    posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0})
    print(posterior_p['H'])
Example #12
0
    def hypothesis_update(self, node, prediction_error, prediction):
        """
        Updates the hypotheses of the generative model to minimize prediction error

        :param node: name of the node causing the prediction error
        :param prediction_error: the prediction error itself
        :param prediction: prediction causing the prediction error

        :type node : str
        :type prediction_error: np.array
        :type prediction: np.array
        """
        # Theoretically speaking a hypothesis update should achieve both perceptual and motor update
        # Currently in the implementation we make the difference explicit
        # TODO: Need to have custom implementation of bayesian network, so that prediction errors in proprioceptive
        # TODO: nodes (motor) are resolved by executing the motor action, and not performing hypo update
        infer = VariableElimination(self.model)
        if "motor" in node:
            self.sensory_input.action(node, prediction_error, prediction)
        else:
            for hypo in self.model.get_roots():
                result = infer.query(
                    variables=[hypo],
                    evidence={node: np.argmax(prediction_error + prediction)})
                before = self.model.get_cpds(hypo).values
                self.model.get_cpds(hypo).values = result.get(hypo).values
                logging.debug("node[%s] hypothesis-update from %s to %s", hypo,
                              before,
                              result.get(hypo).values)
Example #13
0
    def setUp(self):
        # It is just a moralised version of the above Bayesian network so all the results are same. Only factors
        # are under consideration for inference so this should be fine.
        self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                         ('J', 'L'), ('G', 'L'), ('A', 'R'),
                                         ('J', 'G')])

        factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor()
        factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor()
        factor_j = TabularCPD('J',
                              2,
                              values=[[0.9, 0.6, 0.7, 0.1],
                                      [0.1, 0.4, 0.3, 0.9]],
                              evidence=['A', 'R'],
                              evidence_card=[2, 2]).to_factor()
        factor_q = TabularCPD('Q',
                              2,
                              values=[[0.9, 0.2], [0.1, 0.8]],
                              evidence=['J'],
                              evidence_card=[2]).to_factor()
        factor_l = TabularCPD('L',
                              2,
                              values=[[0.9, 0.45, 0.8, 0.1],
                                      [0.1, 0.55, 0.2, 0.9]],
                              evidence=['J', 'G'],
                              evidence_card=[2, 2]).to_factor()
        factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor()

        self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q,
                                      factor_l, factor_g)
        self.markov_inference = VariableElimination(self.markov_model)
Example #14
0
	def update_cpts_causal_model(self):
		"""
		Con este método se crean las CPTs usando la biblioteca pgmpy a partir
		del diccionario de beliefs que tiene el agente. 
		"""
		adj_list = self.model.get_nodes_and_predecessors()
		logging.info("Updating cpts from beliefs")
		var_values = {n : \
			self.model.get_variable_values(n) for n in adj_list}
		backup_model = self.model.pgmodel.copy()
		for variable in self.beliefs:
			evidence = adj_list[variable]
			evidence_card = [len(var_values[parent]) for parent in evidence]
			cpd_table = TabularCPD(variable=variable, variable_card=\
						len(var_values[variable]), values=self.beliefs[variable],\
						evidence=evidence, evidence_card=evidence_card)
			self.model.pgmodel.add_cpds(cpd_table)
		if self.model.pgmodel.check_model():
			self.model.infer_system = VariableElimination(self.model.pgmodel)
			# logging.info("PGMPY CPTs")
			# for cpd in self.model.pgmodel.get_cpds():
			# 	logging.info(cpd)
		else:
			for cpd in backup_model.get_cpds():
				logging.info(cpd)
			raise ValueError("Error with CPTs")
Example #15
0
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD('J',
                           2,
                           values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           evidence=['A', 'R'],
                           evidence_card=[2, 2])
        cpd_q = TabularCPD('Q',
                           2,
                           values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=['J'],
                           evidence_card=[2])
        cpd_l = TabularCPD('L',
                           2,
                           values=[[0.9, 0.45, 0.8, 0.1],
                                   [0.1, 0.55, 0.2, 0.9]],
                           evidence=['J', 'G'],
                           evidence_card=[2, 2])
        cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)
Example #16
0
 def get_most_probable_weapon(self):
     """
     The code explain itself better then I could ever do.
     """
     w_infer = VariableElimination(self._weapons_model)
     w_dis = w_infer.query(variables=['w'], show_progress=False).values
     return self._weapons_in_order[list(w_dis).index(max(w_dis))]
Example #17
0
 def get_most_probable_suspect(self):
     """
     The code explain itself better then I could ever do.
     """
     s_infer = VariableElimination(self._suspects_model)
     s_dis = s_infer.query(variables=['s'], show_progress=False).values
     return self._suspects_in_order[list(s_dis).index(max(s_dis))]
    def exact_inference(self, filename):
        result = 0
        f = open(filename, 'r')
        query_variables, evidence_variables = self.__extract_query(
            f.readline())

        eliminate = VariableElimination(self.networks)

        evidence_variables_mapped = dict()
        for variable in evidence_variables:
            evidence_variables_mapped[variable] = self.mapper[variable][
                evidence_variables[variable]]

        query_variables_feature = list(query_variables.keys())

        result = eliminate.query(variables=query_variables_feature,
                                 evidence=evidence_variables_mapped)

        value = result.values
        for feature in result.variables:
            value = value[result.get_state_no(
                feature, self.mapper[feature][query_variables[feature]])]

        f.close()
        return value
Example #19
0
def probnet_inference(model, h, b, d):
    H = 1 if h > 10 else 0
    B = 1 if b > 20 else 0
    D = 1 if d > 3 else 0
    print(H, B, D)
    infer = VariableElimination(model)
    return infer.map_query(['S'], evidence={'H': H, 'B': B, 'D': D})
Example #20
0
def mutual_information(self, X, Y, evidence):
    # if one of the variables is already in the evidence set then return because it makes no sense to calculate
    if X in evidence or Y in evidence:
        return -1

    # set up inference using variable elimination algorithm
    from pgmpy.inference import VariableElimination

    model_infer = VariableElimination(self.model_pgmpy)

    # calculate joint distribution
    joint = model_infer.query(variables=[X, Y], evidence=evidence, joint=True)

    # calculate marginals from joint
    Y_mar = joint.marginalize([X], inplace=False).values
    X_mar = joint.marginalize([Y], inplace=False).values

    # sometimes order of joint table is inverted, I want to guarantee Y on rows
    if joint.variables[0] != Y:
        XY_joint = np.transpose(joint.values)
    else:
        XY_joint = joint.values

    from math import log
    mutual_info = 0
    for i in range(len(Y_mar)):
        for j in range(len(X_mar)):
            try:
                mutual_info += XY_joint[i, j] * log(XY_joint[i, j] /
                                                    (Y_mar[i] * X_mar[j]))
            except ValueError:
                # in information theory 0*log(0)=0 so I can skip the value
                mutual_info = mutual_info

    return mutual_info
Example #21
0
def buildBN():
    burglary_model = BayesianModel([('Burglary', 'Alarm'),
                                    ('Earthquake', 'Alarm'),
                                    ("Alarm", "JohnCalls"),
                                    ("Alarm", "MaryCalls")])

    cpd_burg = TabularCPD(variable='Burglary',
                          variable_card=2,
                          values=[[.999], [.001]])  # [ P(!B), p(B) ]

    cpd_earth = TabularCPD(variable='Earthquake',
                           variable_card=2,
                           values=[[.998], [.002]])  # [ P(!E), p(E) ]

    cpd_alarm = TabularCPD(
        variable='Alarm',
        variable_card=2,
        values=[
            [.999, .06, .71,
             .05],  # P(!A|!E,!B), P(!A|!E,B), P(!A|E,!B), P(!A|E,B)
            [.001, .94, .29, .95]
        ],  # P(A|!E,!B), P(A|!E,B), P(A|E,!B), P(A|E,B)
        evidence=['Earthquake', 'Burglary'],
        evidence_card=[2, 2])

    cpd_john = TabularCPD(
        variable="JohnCalls",
        variable_card=2,
        values=[[.95, .10], [.05, .90]],  # P(!J|!A), P(!J|A)
        evidence=['Alarm'],
        evidence_card=[2])  # P(J|!A), P(J|A)

    cpd_mary = TabularCPD(
        variable="MaryCalls",
        variable_card=2,
        values=[[.99, .30], [.01, .70]],  # P(!M|!A), P(!M|A)
        evidence=['Alarm'],
        evidence_card=[2])  # P(M|!A), P(M|A)

    burglary_model.add_cpds(cpd_burg, cpd_earth, cpd_alarm, cpd_john, cpd_mary)

    # print(burglary_model.check_model())
    # print(burglary_model.get_independencies())
    # print(burglary_model.edges())
    # print(burglary_model.get_cpds())

    # Doing exact inference using Variable Elimination
    burglary_infer = VariableElimination(burglary_model)

    # using D-interference to determine conditional dependence of B and E given A is observed
    # print(burglary_model.is_active_trail('Burglary', 'Earthquake'))
    # print(burglary_model.is_active_trail('Burglary', 'Earthquake', observed=['Alarm']))

    # print(burglary_infer.query(variables=['JohnCalls'], joint=False, evidence={'Earthquake': 0})['JohnCalls'])
    # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'Burglary': 1, 'Earthquake': 0})['MaryCalls'])
    # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'Burglary': 1, 'Earthquake': 1})['MaryCalls'])
    # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'JohnCalls': 1})['MaryCalls'])
    # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'JohnCalls': 1, 'Burglary': 0,"Earthquake": 0})['MaryCalls'])

    return burglary_infer
Example #22
0
def main_brute(file, keep_atts, edges):

    perms = list(itertools.permutations(keep_atts))

    max_widths, cliques, dictionaries, sat_runtimes, total_sat_runtime = find_widths_brute(
        file, keep_atts, edges)
    min_tree_width_idx = np.argmin(max_widths)
    min_tree_width = np.min(max_widths)
    best_ordering = perms[min_tree_width_idx]
    best_clique = cliques[min_tree_width_idx]
    best_dictionary = dictionaries[min_tree_width_idx]
    graph = create_bayes_net(file, keep_atts, edges)
    mgraph = graph.to_markov_model()
    inference = VariableElimination(mgraph)

    # Perform inference using the best ordering
    igraph = inference.induced_graph(list(best_ordering))
    graph_size = len(keep_atts)
    density = nx.density(igraph)
    n_edges = igraph.number_of_edges()
    worst_runtime = np.max(sat_runtimes)
    print("Graph size ", graph_size)
    print("Graph density ", density)
    print("Graph edges ", n_edges)
    print("The minimum tree width is ", min_tree_width)
    print("The best ordering is ", best_ordering)
    print("The worst case SAT runtime is ", worst_runtime)
    print("The total SAT runtime is ", total_sat_runtime)

    return graph_size, density, n_edges, worst_runtime, total_sat_runtime
Example #23
0
    def setUp(self):
        self.bayesian_model = BayesianModel([("A", "J"), ("R", "J"),
                                             ("J", "Q"), ("J", "L"),
                                             ("G", "L")])
        cpd_a = TabularCPD("A", 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD("R", 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD(
            "J",
            2,
            values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
            evidence=["A", "R"],
            evidence_card=[2, 2],
        )
        cpd_q = TabularCPD("Q",
                           2,
                           values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=["J"],
                           evidence_card=[2])
        cpd_l = TabularCPD(
            "L",
            2,
            values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
            evidence=["J", "G"],
            evidence_card=[2, 2],
        )
        cpd_g = TabularCPD("G", 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)
Example #24
0
    def predict(self, data):
        """
        Predicts states of all the missing variables.

        Parameters
        ----------
        data : pandas DataFrame object
            A DataFrame object with column names same as the variables in the model.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> train_data = values[:800]
        >>> predict_data = values[800:]
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> predict_data = predict_data.copy()
        >>> predict_data.drop('E', axis=1, inplace=True)
        >>> y_pred = model.predict(predict_data)
        >>> y_pred
            E
        800 0
        801 1
        802 1
        803 1
        804 0
        ... ...
        993 0
        994 0
        995 1
        996 1
        997 0
        998 0
        999 0
        """
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        missing_variables = set(self.nodes()) - set(data.columns)
        pred_values = defaultdict(list)

        # Send state_names dict from one of the estimated CPDs to the inference class.
        model_inference = VariableElimination(
            self, state_names=self.get_cpds()[0].state_names)
        for index, data_point in data.iterrows():
            states_dict = model_inference.map_query(
                variables=missing_variables, evidence=data_point.to_dict())
            for k, v in states_dict.items():
                pred_values[k].append(v)
        return pd.DataFrame(pred_values, index=data.index)
Example #25
0
    def make_accusation(self):
        if self._accusation:
            return self._accusation

        s_infer = VariableElimination(self._suspects_model)
        s_dis = s_infer.query(variables=['s'], show_progress=False)
        w_infer = VariableElimination(self._weapons_model)
        w_dis = w_infer.query(variables=['w'], show_progress=False)
        r_infer = VariableElimination(self._rooms_model)
        r_dis = r_infer.query(variables=['r'], show_progress=False)

        if entropy(s_dis.values) <= self._accusation_entropy_threshold and \
                entropy(w_dis.values) <= self._accusation_entropy_threshold and \
                entropy(r_dis.values) <= self._accusation_entropy_threshold:
            return self._suspects_in_order[list(s_dis.values).index(max(list(s_dis.values)))], \
                   self._weapons_in_order[list(w_dis.values).index(max(list(w_dis.values)))], \
                   self._rooms_in_order[list(r_dis.values).index(max(list(r_dis.values)))]
 def category_probability(self, category: str) -> float:
     '''return the probability of the given category'''
     score = Data.CATEGORY_VALUES.get(category, 0)
     elimination = VariableElimination(self.model)
     probability = elimination.query(variables=[Data.CATEGORY_NAME])
     state = probability.get_state_no(Data.CATEGORY_NAME,
                                      self.categories.get(category, 0))
     return probability.values[state]
Example #27
0
def get_alarm_prob(bayes_net):
    """Calculate the marginal 
    probability of the alarm 
    ringing in the 
    power plant system."""
    solver = VariableElimination(bayes_net)
    alarm_prob = solver.query(variables=["alarm"], joint=False)
    return alarm_prob["alarm"].values[1]
Example #28
0
def get_gauge_prob(bayes_net):
    """Calculate the marginal
    probability of the gauge 
    showing hot in the 
    power plant system."""
    solver = VariableElimination(bayes_net)
    gauge_prob = solver.query(variables=["gauge"], joint=False)
    return gauge_prob["gauge"].values[1]
Example #29
0
def MH_sampler(bayes_net, initial_state):
    """
    initial_state is a list of length 6 where:
    index 0-2: represent skills of teams A,B,C (values lie in [0,3] inclusive)
    index 3-5: represent results of matches AvB, BvC, CvA (values lie in [0,2] inclusive)
    """
    if not initial_state:
        sample = list(np.random.randint(0, 4, size=[
            3,
        ])) + [0, random.randint(0, 2), 2]
        return tuple(sample)

    sample = list(initial_state)
    nodes = list(bayes_net.nodes())
    val_dict = {node: val for val, node in zip(sample, nodes)}

    solver = VariableElimination(bayes_net)

    for idx, val in enumerate(sample):
        evid = {
            node: val
            for node, val in zip(nodes, sample) if node != nodes[idx]
        }
        if idx < 3:
            weights = [norm.pdf(i, sample[idx], 1) for i in range(4)]
            new_val = random.choices([0, 1, 2, 3], weights=weights)[0]
            prob = solver.query(variables=[nodes[idx]],
                                evidence={
                                    'AvB': 0,
                                    'CvA': 2
                                },
                                joint=False,
                                show_progress=False)
            # prob = solver.query(variables=[nodes[idx]], evidence=evid, joint=False, show_progress=False)
            prob = prob[nodes[idx]].values
            prob1 = prob[new_val]
            prob0 = prob[val]
        elif idx == 4:
            weights = [norm.pdf(i, sample[idx], 1) for i in range(3)]
            new_val = random.choices([0, 1, 2], weights=weights)[0]
            prob = solver.query(variables=[nodes[idx]],
                                evidence={
                                    'AvB': 0,
                                    'CvA': 2
                                },
                                joint=False,
                                show_progress=False)
            # prob = solver.query(variables=['BvC'], evidence=evid, joint=False, show_progress=False)
            prob = prob['BvC'].values
            prob1 = prob[new_val]
            prob0 = prob[val]
        else:
            continue
        alpha = min(1.0, prob1 / prob0)
        if random.uniform(0, 1) < alpha:
            sample[idx] = new_val

    return tuple(sample)
Example #30
0
    def start(self):

        cpd_difficulty = TabularCPD(variable='Difficulty',
                                    variable_card=2,
                                    values=[[0.6], [0.4]])
        cpd_musicianship = TabularCPD(variable='Musicianship',
                                      variable_card=2,
                                      values=[[0.7], [0.3]])

        cpd_Rating = TabularCPD(variable='Rating',
                                variable_card=3,
                                values=[[0.3, 0.05, 0.9, 0.5],
                                        [0.4, 0.25, 0.08, 0.3],
                                        [0.3, 0.7, 0.02, 0.2]],
                                evidence=['Difficulty', 'Musicianship'],
                                evidence_card=[2, 2])

        cpd_Exam = TabularCPD(variable='Exam',
                              variable_card=2,
                              values=[[0.95, 0.2], [0.05, 0.8]],
                              evidence=['Musicianship'],
                              evidence_card=[2])

        cpd_Letter = TabularCPD(variable='Letter',
                                variable_card=2,
                                values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]],
                                evidence=['Rating'],
                                evidence_card=[3])
        self.musicModel.add_cpds(cpd_difficulty, cpd_musicianship, cpd_Rating,
                                 cpd_Exam, cpd_Letter)
        print(self.musicModel.check_model())
        infer = VariableElimination(self.musicModel)
        examResult = infer.query(variables=['Exam'],
                                 evidence={'Musicianship': 1})['Exam']
        musicResult = infer.query(variables=['Musicianship'])['Musicianship']
        ratingResult = infer.query(variables=['Rating'],
                                   evidence={
                                       'Musicianship': 1,
                                       'Difficulty': 0
                                   })['Rating']
        diffResult = infer.query(variables=['Difficulty'])['Difficulty']
        letterResult = infer.query(variables=['Letter'],
                                   evidence={'Rating': 1})['Letter']

        print(examResult)
        print(musicResult)
        print(ratingResult)
        print(diffResult)
        print(letterResult)

        # THIS IS NOT WORKING IT IS PART 2 Getting weird results
        letterNoOtherEvidence = infer.query(variables=['Letter'])['Letter']
        letterResult = infer.query(variables=['Letter'],
                                   evidence={'Musicianship': 0})['Letter']
        print(letterNoOtherEvidence)
        print(letterResult)
        print('')