Example #1
1
def main():
    # Defining the network structure
    model = BayesianModel([('C', 'H'), ('P', 'H')])

    # H: host
    # P: prize
    # C: contestant

    # Defining the CPDs:
    cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]])
    cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]])
    cpd_h = TabularCPD('H', 3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5],
                                [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5],
                                [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]],
                       evidence=['C', 'P'], evidence_card=[3, 3])

    # Associating the CPDs with the network structure.
    model.add_cpds(cpd_c, cpd_p, cpd_h)

    # Some other methods
    # model.get_cpds()

    # check_model check for the model structure and the associated CPD and
    # returns True if everything is correct otherwise throws an exception
    # print model.check_model()

    # Infering the posterior probability
    infer = VariableElimination(model)
    posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0})
    print(posterior_p['H'])
Example #2
0
    def inference(self, variables, evidence, mode="auto", log=True):
        ''' (6)
        Computes the inference over some variables of the network (given some evidence)
        '''

        inference = VariableElimination(self.best_model)
        #inference = BeliefPropagation(self.markov)
        #inference = Mplp(self.best_model)
        header = "------------------- INFERENCE ------------------------"
        self.log(header, log)
        self.file_writer.write_txt(header, newline=True)
        self.file_writer.write_txt("(With parents all set to value 1)")

        if mode == "auto":
            self.log("          (with parents all set to value 1)", log)
            for node in self.best_model.nodes():
                variables = [node]
                parents = self.best_model.get_parents(node)
                evidence = dict()
                for p in parents:
                    evidence[p] = 1
                phi_query = inference.query(variables, evidence)
                for key in phi_query:
                    self.file_writer.write_txt(str(phi_query[key]))
                    self.log(phi_query[key], log)

        elif mode == "manual":
            phi_query = inference.query(variables, evidence)
            for key in phi_query:
                self.log(phi_query[key], log)
            '''
Example #3
0
def MH_sampler(bayes_net, initial_state):
    """
    initial_state is a list of length 6 where:
    index 0-2: represent skills of teams A,B,C (values lie in [0,3] inclusive)
    index 3-5: represent results of matches AvB, BvC, CvA (values lie in [0,2] inclusive)
    """
    if not initial_state:
        sample = list(np.random.randint(0, 4, size=[
            3,
        ])) + [0, random.randint(0, 2), 2]
        return tuple(sample)

    sample = list(initial_state)
    nodes = list(bayes_net.nodes())
    val_dict = {node: val for val, node in zip(sample, nodes)}

    solver = VariableElimination(bayes_net)

    for idx, val in enumerate(sample):
        evid = {
            node: val
            for node, val in zip(nodes, sample) if node != nodes[idx]
        }
        if idx < 3:
            weights = [norm.pdf(i, sample[idx], 1) for i in range(4)]
            new_val = random.choices([0, 1, 2, 3], weights=weights)[0]
            prob = solver.query(variables=[nodes[idx]],
                                evidence={
                                    'AvB': 0,
                                    'CvA': 2
                                },
                                joint=False,
                                show_progress=False)
            # prob = solver.query(variables=[nodes[idx]], evidence=evid, joint=False, show_progress=False)
            prob = prob[nodes[idx]].values
            prob1 = prob[new_val]
            prob0 = prob[val]
        elif idx == 4:
            weights = [norm.pdf(i, sample[idx], 1) for i in range(3)]
            new_val = random.choices([0, 1, 2], weights=weights)[0]
            prob = solver.query(variables=[nodes[idx]],
                                evidence={
                                    'AvB': 0,
                                    'CvA': 2
                                },
                                joint=False,
                                show_progress=False)
            # prob = solver.query(variables=['BvC'], evidence=evid, joint=False, show_progress=False)
            prob = prob['BvC'].values
            prob1 = prob[new_val]
            prob0 = prob[val]
        else:
            continue
        alpha = min(1.0, prob1 / prob0)
        if random.uniform(0, 1) < alpha:
            sample[idx] = new_val

    return tuple(sample)
Example #4
0
class TimeVE:
    def setup(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), columns=['A', 'B', 'C', 'D', 'E'])
        model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        model.fit(values)
        self.inference = VariableElimination(model)

    def time_query(self):
        self.inference.query(['A', 'B'])
Example #5
0
 def query(bayes_net, query_vars, evidence_vars):
     bayes_net_infer = VariableElimination(bayes_net)
     if evidence_vars:
         q = bayes_net_infer.query(variables=query_vars,
                                   evidence=evidence_vars,
                                   show_progress=False)
     else:
         q = bayes_net_infer.query(variables=query_vars,
                                   evidence=None,
                                   show_progress=False)
     return BayesNetHelper.convertFactorToDF(q)
Example #6
0
class HailfinderVE:
    def setup(self):
        reader = BIFReader('hailfinder.bif')
        model = reader.get_bayesian_model()
        self.inference = VariableElimination(model)

    def time_hailfinder_bound(self):
        self.inference.query('Boundaries')

    def time_hailfinder_Wind(self):
        self.inference.query('WindFieldPln')
Example #7
0
 def get_most_probable_weapon(self):
     """
     The code explain itself better then I could ever do.
     """
     w_infer = VariableElimination(self._weapons_model)
     w_dis = w_infer.query(variables=['w'], show_progress=False).values
     return self._weapons_in_order[list(w_dis).index(max(w_dis))]
Example #8
0
 def get_most_probable_suspect(self):
     """
     The code explain itself better then I could ever do.
     """
     s_infer = VariableElimination(self._suspects_model)
     s_dis = s_infer.query(variables=['s'], show_progress=False).values
     return self._suspects_in_order[list(s_dis).index(max(s_dis))]
Example #9
0
    def print_beliefs(self):

        s_infer = VariableElimination(self._suspects_model)
        s_dis = s_infer.query(variables=['s'], show_progress=False)
        print(s_dis)
        print(self._suspects_in_order)

        w_infer = VariableElimination(self._weapons_model)
        w_dis = w_infer.query(variables=['w'], show_progress=False)
        print(w_dis)
        print(self._weapons_in_order)

        r_infer = VariableElimination(self._rooms_model)
        r_dis = r_infer.query(variables=['r'], show_progress=False)
        print(r_dis)
        print(self._rooms_in_order)
Example #10
0
def network_inference(network_model, data):
    """    
    predict the probability of the state of the fault mode(all the missing variables).
    
    input:pandas Series object at each time!!!
    output:the predicted state
    """
    # 输入情况异常处理
    if set(data.index) == set(network_model.nodes()):
        raise ValueError("No variable missing in data. Nothing to predict")
    elif set(data.index) - set(network_model.nodes()):
        raise ValueError("Data has variables which are not in the model")

    missing_variables = set(network_model.nodes()) - set(data.index)
    # 选择精确推理,变量消除
    model_inference = VariableElimination(network_model)
    # iterrows 和 下面的 items 分别是针对datafram和字典创建的生成迭代器
    states_dict = model_inference.query(variables=missing_variables,
                                        evidence=data.to_dict())  #对每行(条)状态进行推理
    for k, v in states_dict.items():
        l = len(v.values) - 1
        if v.values[l] > 0.5:
            print(k, 'probabilily occured: %.3f' % v.values[l])
        else:
            #print('Normal')
            return states_dict
Example #11
0
def testCalifornia(true_ny_data, true_ca_data, predicted_ca_data,
                   predicted_ny_data):
    ca_infer = VariableElimination(cities_model)

    avg_prob = 0.0

    # Computing the probability of bronc given smoke.
    for index, true_ny_label in enumerate(true_ny_data):
        print "True NY label " + str(true_ny_label)
        true_ca_label = true_ca_data[index]
        print "True CA label " + str(true_ca_label)

        predicted_ca_label = predicted_ca_data[index]
        print "Predicted CA label " + str(predicted_ca_label)

        predicted_ny_label = predicted_ny_data[index]
        prob_ca_protest = ca_infer.query(variables=['True CA'],
                                         evidence={
                                             'True NY': true_ny_label,
                                             'Predicted CA':
                                             predicted_ca_label,
                                             'Predicted NY': predicted_ny_label
                                         })
        factor = prob_ca_protest['True CA']
        print factor
        # print factor.assignment([1])
        # avg_prob += prob_ca_protest

    # avg_prob /= len(true_ny_data)
    print avg_prob
Example #12
0
def Inference(model, variables, evidence, treshold):
    #根据evidence查询variable的概率
    #evidences: {var1:label, var2: label}
    #variables: [var3, var4]
    #treshold: 控制最大概率的阈值
    #返回值: 最大概率取值/对应概率为字典
    result = {}
    try:
        infer = VariableElimination(model)
        inferresult = infer.query(variables=variables, evidence=evidence)
        for var in variables:
            tmpvalue = inferresult[var].values.astype('float32')
            print(tmpvalue)
            result[var] = {}
            if len(tmpvalue[tmpvalue == max(
                    tmpvalue)]) == 1 and max(tmpvalue) > treshold:
                result[var] = {
                    'value': np.argmax(tmpvalue),
                    'prob': max(tmpvalue)
                }
            else:
                result[var] = {'value': np.NaN, 'prob': np.NaN}
    except:
        print(traceback.format_exc())
        return False, result
    return True, result
Example #13
0
def mutual_information(self, X, Y, evidence):
    # if one of the variables is already in the evidence set then return because it makes no sense to calculate
    if X in evidence or Y in evidence:
        return -1

    # set up inference using variable elimination algorithm
    from pgmpy.inference import VariableElimination

    model_infer = VariableElimination(self.model_pgmpy)

    # calculate joint distribution
    joint = model_infer.query(variables=[X, Y], evidence=evidence, joint=True)

    # calculate marginals from joint
    Y_mar = joint.marginalize([X], inplace=False).values
    X_mar = joint.marginalize([Y], inplace=False).values

    # sometimes order of joint table is inverted, I want to guarantee Y on rows
    if joint.variables[0] != Y:
        XY_joint = np.transpose(joint.values)
    else:
        XY_joint = joint.values

    from math import log
    mutual_info = 0
    for i in range(len(Y_mar)):
        for j in range(len(X_mar)):
            try:
                mutual_info += XY_joint[i, j] * log(XY_joint[i, j] /
                                                    (Y_mar[i] * X_mar[j]))
            except ValueError:
                # in information theory 0*log(0)=0 so I can skip the value
                mutual_info = mutual_info

    return mutual_info
Example #14
0
    def hypothesis_update(self, node, prediction_error, prediction):
        """
        Updates the hypotheses of the generative model to minimize prediction error

        :param node: name of the node causing the prediction error
        :param prediction_error: the prediction error itself
        :param prediction: prediction causing the prediction error

        :type node : str
        :type prediction_error: np.array
        :type prediction: np.array
        """
        # Theoretically speaking a hypothesis update should achieve both perceptual and motor update
        # Currently in the implementation we make the difference explicit
        # TODO: Need to have custom implementation of bayesian network, so that prediction errors in proprioceptive
        # TODO: nodes (motor) are resolved by executing the motor action, and not performing hypo update
        infer = VariableElimination(self.model)
        if "motor" in node:
            self.sensory_input.action(node, prediction_error, prediction)
        else:
            for hypo in self.model.get_roots():
                result = infer.query(
                    variables=[hypo],
                    evidence={node: np.argmax(prediction_error + prediction)})
                before = self.model.get_cpds(hypo).values
                self.model.get_cpds(hypo).values = result.get(hypo).values
                logging.debug("node[%s] hypothesis-update from %s to %s", hypo,
                              before,
                              result.get(hypo).values)
Example #15
0
    def inf(self, file1):
        f1 = open(file1, encoding="utf8")
        lines = f1.readlines()
        i = 0
        G = BayesianModel()
        nodeList = {}
        while i < len(lines):
            if lines[i] == '\n':
                break
            nodeName = self.getnode(lines[i])
            valueNum = int(lines[i + 1])
            cpd_str = lines[i + 2]
            sequence = self.getList(lines[i + 3])
            card = self.getCard(lines[i + 4])
            cpd = self.parseCpd(cpd_str, valueNum, card)
            l = {}
            l['nodeName'] = nodeName
            l['valueNum'] = valueNum
            l['cpd'] = cpd
            l['sequence'] = sequence
            l['card'] = card
            # l = [nodeName,valueNum,cpd,sequence,card]

            nodeList[nodeName] = l
            i += 5
        edges = self.getegdes(lines[i + 1])
        evidence2 = self.getValue(lines[i + 3])

        # print(nodeList)
        for i in range(int(len(edges) / 2)):
            G.add_edge(edges[2 * i], edges[2 * i + 1])

        for (this, node) in nodeList.items():
            if node['sequence'][0] == '':
                cpt = TabularCPD(variable=node['nodeName'],
                                 variable_card=node['valueNum'],
                                 values=node['cpd'])
            else:
                cpt = TabularCPD(variable=node['nodeName'],
                                 variable_card=node['valueNum'],
                                 evidence=node['sequence'],
                                 evidence_card=node['card'],
                                 values=node['cpd'])
            G.add_cpds(cpt)

        if G.check_model():
            # print('1')
            # belief_propagation = BeliefPropagation(G)
            inference = VariableElimination(G)
            result = ''

            for node in G.nodes():
                if node not in evidence2:
                    namelist = [node]
                    result += node + ' '
                    phi_query = inference.query(variables=namelist,
                                                evidence=evidence2,
                                                show_progress=False).values
                    result += str(phi_query) + '\n'
            print(result)
Example #16
0
    def __init__(self, model, actions, py_func):
        """ model is a pgmpy.BayesianModel
            actions is a list of (var,value) tuples """
        self.py_func = py_func
        self.parents = sorted(model.get_parents('Y'))
        self.N = len(self.parents)
        self.actions = actions
        self.K = len(actions)

        self.observational_model = model
        self.observational_inference = VariableElimination(
            self.observational_model)
        self.post_action_models = [
            GeneralModel.do(model, action) for action in actions
        ]
        self.samplers = [
            BayesianModelSampling(model_a)
            for model_a in self.post_action_models
        ]

        self.interventional_distributions = []
        for indx, new_model in enumerate(self.post_action_models):
            infer = VariableElimination(new_model)
            _, distribution_over_parents = infer.query(self.parents)
            self.interventional_distributions.append(distribution_over_parents)

        self.pre_compute()
Example #17
0
    def Test_Data_Inference_n_steps(self, df_test, n_tsteps):
        # make a function that can predict N timesteps ahead.
        df_inference_results = df_test.filter(items=COLUMN_SEQUENCE).copy()
        infer = VariableElimination(self.model)

        dict_unique_vals = dict(zip(df_test.columns, [df_test[i].unique() for i in df_test.columns]))
        result_list = ['M_t']
        if n_tsteps > 1:
            result_list = result_list+["M_t+{}".format(x) for x in range(1,n_tsteps)]
        count = 0
        self.debug_timmer = []
        for key, value in df_test.filter(items=[x for x in df_test.columns if x not in result_list]).to_dict('index').items():

                index_key = key

                if check_data_in_evidence(value, dict_unique_vals):
                    for query_var in result_list:
                        tic = time.time()
                        result = infer.query(variables=[query_var],evidence=value)
                        toc = time.time() - tic
                        self.debug_timmer.append(toc)
                        df_inference_results.at[index_key,'{}_0'.format(query_var)] = result[query_var].values[0]
                        df_inference_results.at[index_key,'{}_1'.format(query_var)] = result[query_var].values[1]
                        df_inference_results.at[index_key,'{}'.format(query_var)] = Map_Occ_Values(result[query_var].values[1])
                else:
                    for query_var in result_list:
                        df_inference_results.at[index_key,'{}_0'.format(query_var)] = np.nan
                        df_inference_results.at[index_key,'{}_1'.format(query_var)] = np.nan
                        df_inference_results.at[index_key,'{}'.format(query_var)] = np.nan

                count+=1

        logging.info("thermostat {} - Iterations of test {}".format(self.thermostat.tstat_id, count))
        return df_inference_results
    def exact_inference(self, filename):
        result = 0
        f = open(filename, 'r')
        query_variables, evidence_variables = self.__extract_query(
            f.readline())

        eliminate = VariableElimination(self.networks)

        evidence_variables_mapped = dict()
        for variable in evidence_variables:
            evidence_variables_mapped[variable] = self.mapper[variable][
                evidence_variables[variable]]

        query_variables_feature = list(query_variables.keys())

        result = eliminate.query(variables=query_variables_feature,
                                 evidence=evidence_variables_mapped)

        value = result.values
        for feature in result.variables:
            value = value[result.get_state_no(
                feature, self.mapper[feature][query_variables[feature]])]

        f.close()
        return value
Example #19
0
def inference(model, variables=None, evidence=None, verbose=3):
    '''
    Inference is same as asking conditional probability questions to the models. 
    i.e., What is the probability of a sprinkler is on given that it is raining which is basically equivalent of asking $ P(g^1 | i^1) $. 
    Inference algorithms deals with efficiently finding these conditional probability queries.
    
    There are two main categories for inference algorithms:
        1. Exact Inference: These algorithms find the exact probability values for our queries.
        2. Approximate Inference: These algorithms try to find approximate values by saving on computation.
    
    Exact Inference
        There are multiple algorithms for doing exact inference. 
    
    Two common Inference algorithms with variable Elimination
        1. Clique Tree Belief Propagation
        2. Variable Elimination
    
    The basic concept of variable elimination is same as doing marginalization over Joint Distribution. 
    But variable elimination avoids computing the Joint Distribution by doing marginalization over much smaller factors. 
    So basically if we want to eliminate $ X $ from our distribution, then we compute 
    the product of all the factors involving $ X $ and marginalize over them, 
    thus allowing us to work on much smaller factors.
    
    In the above equation we can see that we pushed the summation inside and operated 
    the summation only factors that involved that variable and hence avoiding computing the 
    complete joint distribution.
    '''

    model_infer = VariableElimination(model['model'])
    # Computing the probability of Wet Grass given Rain.
    q = model_infer.query(variables=variables, evidence=evidence)
    print(q)
    # for varname in variables:
    # print(q[varname])
    return (q)
Example #20
0
def main():
    # Defining the network structure
    model = BayesianModel([('C', 'H'), ('P', 'H')])

    # H: host
    # P: prize
    # C: contestant

    # Defining the CPDs:
    cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]])
    cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]])
    cpd_h = TabularCPD('H',
                       3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5],
                           [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5],
                           [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]],
                       evidence=['C', 'P'],
                       evidence_card=[3, 3])

    # Associating the CPDs with the network structure.
    model.add_cpds(cpd_c, cpd_p, cpd_h)

    # Some other methods
    # model.get_cpds()

    # check_model check for the model structure and the associated CPD and
    # returns True if everything is correct otherwise throws an exception
    # print model.check_model()

    # Infering the posterior probability
    infer = VariableElimination(model)
    posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0})
    print(posterior_p['H'])
Example #21
0
def get_gauge_prob(bayes_net):
    """Calculate the marginal
    probability of the gauge 
    showing hot in the 
    power plant system."""
    solver = VariableElimination(bayes_net)
    gauge_prob = solver.query(variables=["gauge"], joint=False)
    return gauge_prob["gauge"].values[1]
 def category_probability(self, category: str) -> float:
     '''return the probability of the given category'''
     score = Data.CATEGORY_VALUES.get(category, 0)
     elimination = VariableElimination(self.model)
     probability = elimination.query(variables=[Data.CATEGORY_NAME])
     state = probability.get_state_no(Data.CATEGORY_NAME,
                                      self.categories.get(category, 0))
     return probability.values[state]
Example #23
0
def get_alarm_prob(bayes_net):
    """Calculate the marginal 
    probability of the alarm 
    ringing in the 
    power plant system."""
    solver = VariableElimination(bayes_net)
    alarm_prob = solver.query(variables=["alarm"], joint=False)
    return alarm_prob["alarm"].values[1]
Example #24
0
    def make_accusation(self):
        if self._accusation:
            return self._accusation

        s_infer = VariableElimination(self._suspects_model)
        s_dis = s_infer.query(variables=['s'], show_progress=False)
        w_infer = VariableElimination(self._weapons_model)
        w_dis = w_infer.query(variables=['w'], show_progress=False)
        r_infer = VariableElimination(self._rooms_model)
        r_dis = r_infer.query(variables=['r'], show_progress=False)

        if entropy(s_dis.values) <= self._accusation_entropy_threshold and \
                entropy(w_dis.values) <= self._accusation_entropy_threshold and \
                entropy(r_dis.values) <= self._accusation_entropy_threshold:
            return self._suspects_in_order[list(s_dis.values).index(max(list(s_dis.values)))], \
                   self._weapons_in_order[list(w_dis.values).index(max(list(w_dis.values)))], \
                   self._rooms_in_order[list(r_dis.values).index(max(list(r_dis.values)))]
Example #25
0
def graph_inference(graph, targets, evidence):
    inf = VariableElimination(graph)
    query = inf.query(variables=targets, evidence=evidence)
    print(query)
    #     for t in targets:
    #         print(query[t])
    #         print(query[t].variables, query[t].values)

    return query
def calculate_posterior(bayes_net):
    """Calculate the posterior distribution of the BvC match given that A won against B and tied C. 
    Return a list of probabilities corresponding to win, loss and tie likelihood."""
    posterior = [0,0,0]
    # TODO: finish this function    
    solver=VariableElimination(bayes_net)
    conditional_prob = solver.query(variables=['BvC'],evidence={'AvB':0,'CvA':2})
    posterior = conditional_prob['BvC'].values
    return posterior # list 
Example #27
0
def evaluate(symptom_init, successors, user_sub_answers, user=None):
    #starts with 'yes' for initial symptom
    symptom_init = get_id_from_name(symptom_init, df_related_symptoms)
    G_sympt = graph_dict[symptom_init][0]
    condition_list = graph_dict[symptom_init][2]
    network_infer = VariableElimination(G_sympt)

    symp_list_val = [1]
    symp_list_name = [symptom_init]
    for i, answer in enumerate(user_sub_answers):
        sub_sympt_id = get_id_from_name(successors[i], df_sub_symptom_names)
        symp_list_name.append(sub_sympt_id)
        if answer:
            symp_list_val.append(1)
        else:
            symp_list_val.append(0)

    # all conditions to compare
    # condition_list is all the conditions reachable via symptom_init
    relev_conds = select_relevant_cond(symptom_init, condition_list)
    llen = len(symp_list_val)

    # create evidence dict
    # e.g. {symptom:yes}
    evidence = {}
    for k in range(llen):
        evidence.update({symp_list_name[k]: symp_list_val[k]})
    len_rev_cond = len(relev_conds)
    condition_val_tuples = []
    for j in range(len_rev_cond):
        cond_prob = network_infer.query(variables=[relev_conds[j]],
                                        evidence=evidence)
        val_yes = cond_prob[relev_conds[j]].values[1]
        condition_val_tuples.append([relev_conds[j], val_yes])

    condition_val_tuples = sorted(condition_val_tuples,
                                  key=lambda x: x[1],
                                  reverse=True)

    cond_name_val_tuples = []
    for cond_val_tuple in condition_val_tuples:
        cond_id = cond_val_tuple[0]
        cond_name = get_name_from_id(cond_id, df_cond)
        cond_name_val_tuples.append([cond_name, cond_val_tuple[1]])

    if user is not None:
        new_cond_val_tuples = apply_personal_features(user,
                                                      condition_val_tuples, 4)
        new_cond_name_val_tuples = []
        for new_cond_val_tuple in new_cond_val_tuples:
            new_cond_id = new_cond_val_tuple[0]
            new_cond_name = get_name_from_id(new_cond_id, df_cond)
            new_cond_name_val_tuples.append(
                [new_cond_name, new_cond_val_tuple[1]])
        return new_cond_name_val_tuples

    return cond_name_val_tuples
Example #28
0
 def predict(self, node):
     """
     Predicts the given leaf node (i.e. the observational node) based on the root nodes (i.e. the belief nodes)
     :return: prediction for given observation variable, a prediction is a probability distribution
     :rtype: np.array
     """
     infer = VariableElimination(self.pgmpy_test)
     evidence = self.get_root_nodes()
     evidence = {k: v for k, v in evidence.items() if k not in [node]}
     return infer.query(variables=[node], evidence=evidence)[node].values
Example #29
0
 def _expected_Y(self):
     expected_Y = np.zeros(self.K)
     for indx, new_model in enumerate(self.post_action_models):
         infer = VariableElimination(new_model)
         _, distribution_over_reward = infer.query(['Y'])
         expected_reward = distribution_over_reward.reduce(
             [('Y', 1)], inplace=False
         ).values  #TODO investigate failing if inplace=True - bug in pgmpy?
         expected_Y[indx] = expected_reward
     return expected_Y
def get_gauge_prob(bayes_net):
    """Calculate the marginal
    probability of the gauge 
    showing hot in the 
    power plant system."""
    # TODO: finish this function
    solver=VariableElimination(bayes_net)
    marginal_prob = solver.query(variables=['gauge'])
    gauge_prob=marginal_prob['gauge'].values[1]
    return gauge_prob
def get_alarm_prob(bayes_net):
    """Calculate the marginal 
    probability of the alarm 
    ringing in the 
    power plant system."""
    # TODO: finish this function
    solver=VariableElimination(bayes_net)
    marginal_prob = solver.query(variables=['alarm'])
    alarm_prob=marginal_prob['alarm'].values[1]
    return alarm_prob
Example #32
0
    def start(self):

        cpd_difficulty = TabularCPD(variable='Difficulty',
                                    variable_card=2,
                                    values=[[0.6], [0.4]])
        cpd_musicianship = TabularCPD(variable='Musicianship',
                                      variable_card=2,
                                      values=[[0.7], [0.3]])

        cpd_Rating = TabularCPD(variable='Rating',
                                variable_card=3,
                                values=[[0.3, 0.05, 0.9, 0.5],
                                        [0.4, 0.25, 0.08, 0.3],
                                        [0.3, 0.7, 0.02, 0.2]],
                                evidence=['Difficulty', 'Musicianship'],
                                evidence_card=[2, 2])

        cpd_Exam = TabularCPD(variable='Exam',
                              variable_card=2,
                              values=[[0.95, 0.2], [0.05, 0.8]],
                              evidence=['Musicianship'],
                              evidence_card=[2])

        cpd_Letter = TabularCPD(variable='Letter',
                                variable_card=2,
                                values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]],
                                evidence=['Rating'],
                                evidence_card=[3])
        self.musicModel.add_cpds(cpd_difficulty, cpd_musicianship, cpd_Rating,
                                 cpd_Exam, cpd_Letter)
        print(self.musicModel.check_model())
        infer = VariableElimination(self.musicModel)
        examResult = infer.query(variables=['Exam'],
                                 evidence={'Musicianship': 1})['Exam']
        musicResult = infer.query(variables=['Musicianship'])['Musicianship']
        ratingResult = infer.query(variables=['Rating'],
                                   evidence={
                                       'Musicianship': 1,
                                       'Difficulty': 0
                                   })['Rating']
        diffResult = infer.query(variables=['Difficulty'])['Difficulty']
        letterResult = infer.query(variables=['Letter'],
                                   evidence={'Rating': 1})['Letter']

        print(examResult)
        print(musicResult)
        print(ratingResult)
        print(diffResult)
        print(letterResult)

        # THIS IS NOT WORKING IT IS PART 2 Getting weird results
        letterNoOtherEvidence = infer.query(variables=['Letter'])['Letter']
        letterResult = infer.query(variables=['Letter'],
                                   evidence={'Musicianship': 0})['Letter']
        print(letterNoOtherEvidence)
        print(letterResult)
        print('')
Example #33
0
class BayesNetwork:
    def __init__(self, dataset, graph_structure_index):
        self.dataset = dataset
        self.columns = dataset.dataframe.columns
        self.graph_structure_index = graph_structure_index

    def build_graph(self):
        graph_structure_name = list(
            map(lambda tuple: (self.columns[tuple[0]], self.columns[tuple[1]]),
                self.graph_structure_index))
        self.model = BayesianModel(graph_structure_name)

    def draw_graph(self):
        Drawer.draw_graph(self.model)

    def fit_model(self, prior=False, prior_data=[]):
        if prior:
            pseudo_counts = {{
                'D': [300, 700],
                'I': [500, 500],
                'G': [800, 200],
                'L': [500, 500],
                'S': [400, 600]
            }}
            raise NotImplementedError
        else:
            self.model.fit(self.dataset.dataframe[0:-3],
                           estimator=MaximumLikelihoodEstimator)

    def inference(self, name):
        from pgmpy.inference import VariableElimination
        self.infer = VariableElimination(self.model)
        q = self.infer.query(variables=[name])
        print(q[name])

    def evaluate_result(self):
        for cpd in self.model.get_cpds():
            print("CPD of {variable}:".format(variable=cpd.variable))
            print(cpd)
            accept_node = cpd.variables[0]

            ##3D-dimension
            if len(cpd.values.shape) > 3:
                pass
                # Drawer.draw_3D(cpd.values, x_label=cpd.variables[1],
                #                y_label=cpd.variables[2], z_label=cpd.variables[3])
            ##2D Dimension
            elif len(cpd.values.shape) == 2:
                title = cpd.variables[1] + '----->' + accept_node
                Drawer(title=title,
                       is_show=False,
                       is_save=False,
                       save_path='img/' + title + '.jpg').draw_matrix(
                           cpd.values)
class TestVariableEliminationMarkov(unittest.TestCase):
    def setUp(self):
        # It is just a moralised version of the above Bayesian network so all the results are same. Only factors
        # are under consideration for inference so this should be fine.
        self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'),
                                         ('G', 'L'), ('A', 'R'), ('J', 'G')])

        factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor()
        factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor()
        factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1],
                                              [0.1, 0.4, 0.3, 0.9]],
                              evidence=['A', 'R'], evidence_card=[2, 2]).to_factor()
        factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]],
                              evidence=['J'], evidence_card=[2]).to_factor()
        factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1],
                                              [0.1, 0.55, 0.2, 0.9]],
                              evidence=['J', 'G'], evidence_card=[2, 2]).to_factor()
        factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor()

        self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g)
        self.markov_inference = VariableElimination(self.markov_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.markov_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.markov_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.markov_inference.query(['J'])
        query_result = self.markov_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.markov_inference.query(['Q', 'J'])
        query_result = self.markov_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_max_marginal(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(), 0.1659, decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G']), 0.5714, decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R']),
                                    0.4055, decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R', 'A']),
                                    0.3260, decimal=4)

    def test_map_query(self):
        map_query = self.markov_inference.map_query()
        self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0})

    def test_map_query_with_evidence(self):
        map_query = self.markov_inference.map_query(['A', 'R', 'L'],
                                                    {'J': 0, 'Q': 1, 'G': 0})
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.markov_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.markov_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.markov_inference
        del self.markov_model
import numpy as np
import pandas as pd
from pgmpy.inference import VariableElimination
from pgmpy.models import BayesianModel

data = pd.read_csv('~/Documents/unifiedMLData.csv')

#print data
movie_model = BayesianModel([
('occupation','rating')
#,('gender','rating')
#,('age','rating')
#,('age','occupation')
#,('gender','occupation')
#,('genre','movie_title')
#,('movie_title','rating')
                             ])
movie_model.fit(data)


model_infer = VariableElimination(movie_model)
results = model_infer.query('rating')

print(results['rating'])

#print(movie_model.get_cpds('rating'))
Example #36
0
femaleSchool_cpd = TabularCPD(
variable='F',variable_card=2,
values=[[.8,.3,.2],[.2,.7,.8]],
evidence=['I'],evidence_card=[3])

govtCorr_cpd = TabularCPD(
variable='G', variable_card=2,
values=[[.05,.4,.55,.85],[.95,.6,.45,.15]],
evidence=['F','L'],evidence_card=[2,2])

model.add_cpds(income_cpd, lifeExp_cpd,
femaleSchool_cpd, govtCorr_cpd)

inference = VariableElimination(model)
prob_G = inference.query(variables='G',evidence=dict([('L',1)]))
print(prob_G['G'])
# +-----+----------+
# | G   |   phi(G) |
# |-----+----------|
# | G_0 |   0.7292 |
# | G_1 |   0.2708 |
# +-----+----------+
inference = VariableElimination(model)
prob_G = inference.query(variables='G',evidence=dict([('F',1)]))
print(prob_G['G'])
# +-----+----------+
# | G   |   phi(G) |
# |-----+----------|
# | G_0 |   0.7174 |
# | G_1 |   0.2826 |
Example #37
0
class TestVariableElimination(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.bayesian_inference.query(['J'])
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.bayesian_inference.query(['Q', 'J'])
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))



    def test_max_marginal(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G']), 0.5714, decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R']),
                                    0.4055, decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R', 'A']),
                                    0.3260, decimal=4)

    def test_map_query(self):
        map_query = self.bayesian_inference.map_query()
        self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0,
                                         'L': 0})

    def test_map_query_with_evidence(self):
        map_query = self.bayesian_inference.map_query(['A', 'R', 'L'],
                                                      {'J': 0, 'Q': 1, 'G': 0})
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.bayesian_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.bayesian_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.bayesian_inference
        del self.bayesian_model
Example #38
0
    def configure(self, rf):
        # command format will be the following:
        # trainPGClassifier selfName networkStructure
        print sys.argv

        # read network structure and make graph
        # labels in networkStructure identical to model names
        # networkStructure as a string containing a list of tuples

        # selfName = 'actionPGN'
        # netStructureString = "[('Actions3 exp','actionPGN'), ('Actions4','actionPGN')]"

        selfName = sys.argv[1]
        netStructureString = sys.argv[2]

        netStructure = ast.literal_eval(netStructureString)
        print netStructure

        # collect all model names in a list to extract a unique set
        modelList = []
        for k in netStructure:
            modelList += list(k)
        print list(set(modelList))

        # create a port to connect to /sam/rpc:i to query model path for each model name
        portsList = []
        querySupervisorPort = yarp.RpcClient()
        querySupervisorPortName = '/sam/' + selfName + '/queryRpc'
        querySupervisorPort.open(querySupervisorPortName)

        portsList.append({'name': querySupervisorPortName, 'port': querySupervisorPort})
        yarp.Network.connect(querySupervisorPortName, '/sam/rpc:i')
        # ---------------------------------------------------------------------------------------------------------------
        modelDict = dict()
        failFlag = False
        for j in modelList:
            if j != selfName:
                modNameSplit = j.split(' ')
                cmd = yarp.Bottle()
                cmd.addString('dataDir')
                for l in modNameSplit:
                    cmd.addString(l)
                reply = yarp.Bottle()
                querySupervisorPort.write(cmd, reply)
                if reply.get(0).asString() != 'nack':
                    modelDict[modNameSplit[0]] = {'filename': reply.get(1).asString(), 'pickleData': None}
                    # try:
                    # load pickle for the model file
                    currPickle = pickle.load(open(reply.get(1).asString(), 'rb'))
                    # try loading labelComparisonDict from the pickle
                    if 'labelComparisonDict' in currPickle.keys():
                        modelDict[modNameSplit[0]]['pickleData'] = currPickle['labelComparisonDict']
                        print j, 'labelComparisonDict loaded'
                    else:
                        print modNameSplit[0], 'labelComparisonDict not found'
                        failFlag = True

                    if 'overallPerformanceLabels' in currPickle.keys():
                        modelDict[modNameSplit[0]]['labels'] = currPickle['overallPerformanceLabels']
                        print j, 'overallPerformanceLabels loaded'
                    else:
                        print j, 'overallPerformanceLabels not found'
                        failFlag = True
                    # except:
                    #     failFlag = True
                else:
                    failFlag = True

        print 'FAIL?', failFlag
        if failFlag:
            return False

        modelList = modelDict.keys()
        print modelList

        # ---------------------------------------------------------------------------------------------------------------

        # extract unique lists from the collected data
        # the unique list of pickleData[original] represents the possibleClassifications for each model
        modelDict[selfName] = dict()
        modelDict[selfName]['labels'] = []
        selfModelCol = 1

        for j in modelList:
            modelDict[j]['CPD'] = np.zeros([1, len(modelDict[j]['labels'])])
            print j, 'unique labels:', modelDict[j]['labels']
            print j, 'CPD shape', modelDict[j]['CPD'].shape

            modelDict[selfName]['labels'] += modelDict[j]['labels']
            selfModelCol *= len(modelDict[j]['labels'])
            print

        # the possibleClassifications for both models (outputs of the PGN)
        # are the unique list of the model specific labels for all models
        modelDict[selfName]['labels'] = list(set(modelDict[selfName]['labels']))
        modelDict[selfName]['actualLabels'] = modelDict[j]['pickleData']['original']
        modelDict[selfName]['CPD'] = np.zeros([len(modelDict[selfName]['labels']), selfModelCol])
        print selfName, 'unique labels:', modelDict[selfName]['labels']
        print selfName, 'CPD shape', modelDict[selfName]['CPD'].shape

        # check that original classifications of both are identical
        # otherwise cannot combine them with a single node.
        # This is currently a big limitation that will be removed later
        print modelDict[selfName]['labels']
        for j in modelList:
            print j,
            for k in range(len(modelDict[j]['pickleData']['original'])):
                print modelDict[j]['pickleData']['original'][k]
                if modelDict[j]['pickleData']['original'][k] not in modelDict[selfName]['labels']:
                    modelDict[j]['pickleData']['original'][k] = 'unknown'

        for j in modelList:
            if modelDict[j]['pickleData']['original'] != modelDict[selfName]['actualLabels']:
                failFlag = True
                print 'original classifications of', j, 'are not identical to those of', selfName

        if failFlag:
            return False

        # Update netStructureString to reflect changes in the modelList names
        strSections = netStructureString.split("'")
        for k in range(len(strSections)):
            if len(strSections[k]) > 2 and ',' not in strSections[k]:
                strSections[k] = strSections[k].split(' ')[0]
        netStructureString = "'".join(strSections)
        netStructure = ast.literal_eval(netStructureString)
        # ---------------------------------------------------------------------------------------------------------------
        # iterate through actual labels
        # for each actual label, iterate through models
        # for each model find classification label of this model for current actual label
        # get the index of the current classification and add it to its CPD
        # also calculate which item in the joint CPD needs to be incremented

        for j in range(len(modelDict[selfName]['actualLabels'])):
            currActualLabel = modelDict[selfName]['actualLabels'][j]
            row = modelDict[selfName]['labels'].index(currActualLabel)

            colVar = np.zeros([len(modelList)])
            for k in range(len(modelList)):
                cmod = modelList[k]
                if k != 0:
                    pmod = modelList[k-1]
                    colVar *= len(modelDict[pmod]['labels'])

                colVar[k] = modelDict[cmod]['labels'].index(
                                   modelDict[cmod]['pickleData']['results'][j])
                modelDict[cmod]['CPD'][0, colVar[k]] += 1

            col = sum(colVar)
            modelDict[selfName]['CPD'][row, col] += 1

        # take all CPD's and normalise the matrices
        evidenceCard = copy.deepcopy(modelList)
        for j in modelDict:
            if j == selfName:
                # this is a joint CPD matrix
                # normalise columns to have sum = 1
                modelDict[j]['CPD'] = normalize(modelDict[j]['CPD'], axis=0, norm='l1')
            else:
                # normalise sum of matrix = 1
                modelDict[j]['CPD'] /= np.sum(modelDict[j]['CPD'])
                evidenceCard[evidenceCard.index(j)] = len(modelDict[j]['labels'])
            print modelDict[j]['CPD']

        model = BayesianModel(netStructure)

        # create TabularCPD data structure to nest calculated CPD
        for j in modelDict:
            if j == selfName:
                modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']),
                                                       values=modelDict[j]['CPD'],
                                                       evidence=modelList,
                                                       evidence_card=evidenceCard)
            else:
                modelDict[j]['cpdObject'] = TabularCPD(variable=j,
                                                       variable_card=len(modelDict[j]['labels']),
                                                       values=modelDict[j]['CPD'])

        # Associating the CPDs with the network
        for j in modelDict:
            model.add_cpds(modelDict[j]['cpdObject'])

        # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
        # defined and sum to 1.
        if not model.check_model():
            print 'Model check returned unsuccessful'
            return False

        infer = VariableElimination(model)
        confMatrix = np.zeros(len(modelDict[selfName]['labels']))
        # iterate over all original data and perform classifications to calculate if accuracy with PGN has increased
        for j in range(len(modelDict[selfName]['actualLabels'])):
            currEvidenceDict = dict()
            for k in modelList:
                currEvidenceDict[k] = modelDict[k]['labels'].index(modelDict[k]['pickleData']['results'][j])

            q = infer.query([selfName], currEvidenceDict)

            inferenceClass = modelDict[selfName]['labels'][np.argmax(q[selfName].values)]
            actualClass = modelDict[selfName]['actualLabels'][j]
            confMatrix[modelDict[selfName].index(actualClass), modelDict[selfName].index(inferenceClass)] += 1

        print "%Accuracy with PGN"
        dCalc = SAMTesting.calculateData(modelDict[selfName]['actualLabels'], confMatrix)

        return True
    def predict_probability(self, data):
        """
        Predicts probabilities of all states of the missing variables.

        Parameters
        ----------
        data : pandas DataFrame object
            A DataFrame object with column names same as the variables in the model.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> train_data = values[:80]
        >>> predict_data = values[80:]
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> predict_data = predict_data.copy()
        >>> predict_data.drop('B', axis=1, inplace=True)
        >>> y_prob = model.predict_probability(predict_data)
        >>> y_prob 
            B_0         B_1
        80  0.439178    0.560822
        81  0.581970    0.418030
        82  0.488275    0.511725
        83  0.581970    0.418030
        84  0.510794    0.489206
        85  0.439178    0.560822
        86  0.439178    0.560822
        87  0.417124    0.582876
        88  0.407978    0.592022
        89  0.429905    0.570095
        90  0.581970    0.418030
        91  0.407978    0.592022
        92  0.429905    0.570095
        93  0.429905    0.570095
        94  0.439178    0.560822
        95  0.407978    0.592022
        96  0.559904    0.440096
        97  0.417124    0.582876
        98  0.488275    0.511725
        99  0.407978    0.592022
        """
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        missing_variables = set(self.nodes()) - set(data.columns)
        pred_values = defaultdict(list)

        model_inference = VariableElimination(self)
        for index, data_point in data.iterrows():
            states_dict = model_inference.query(variables=missing_variables, evidence=data_point.to_dict())
            for k, v in states_dict.items():
                for l in range(len(v.values)):
                    state = self.get_cpds(k).state_names[k][l]
                    pred_values[k + '_' + str(state)].append(v.values[l])
        return pd.DataFrame(pred_values, index=data.index)
Example #40
0
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
from pgmpy.factors import TabularCPD
# Now first create the model.
restaurant = BayesianModel([('location', 'cost'),
                            ('quality', 'cost'),
                            ('cost', 'no_of_people'),
                            ('location', 'no_of_people')])
cpd_location = TabularCPD('location', 2, [[0.6, 0.4]])
cpd_quality = TabularCPD('quality', 3, [[0.3, 0.5, 0.2]])
cpd_cost = TabularCPD('cost', 2,
                      [[0.8, 0.6, 0.1, 0.6, 0.6, 0.05],
                       [0.2, 0.1, 0.9, 0.4, 0.4, 0.95]],
                      ['location', 'quality'], [2, 3])
cpd_no_of_people = TabularCPD('no_of_people', 2,
                              [[0.6, 0.8, 0.1, 0.6],
                               [0.4, 0.2, 0.9, 0.4]],
                              ['cost', 'location'], [2, 2])
restaurant.add_cpds(cpd_location, cpd_quality,
                    cpd_cost, cpd_no_of_people)
# Creating the inference object of the model
restaurant_inference = VariableElimination(restaurant)
# Doing simple queries over one or multiple variables.
restaurant_inference.query(variables=['location'])
restaurant_inference.query(variables=['location', 'no_of_people'])
# We can also specify the order in which the variables are to be
# eliminated. If not specified pgmpy automatically computes the
# best possible elimination order.
restaurant_inference.query(variables=['no_of_people'],
                           elimination_order=['location', 'cost', 'quality'])
class StateNameDecorator(unittest.TestCase):
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'],
                    'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'],
                    'time': ['day', 'night']}

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = VariableElimination(student)
        self.model2 = VariableElimination(student, state_names=self.sn2)

    def test_assignment_statename(self):
        req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')],
                   [('speed', 'low'), ('switch', 'off'), ('time', 'day')]]
        req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)],
                   [('speed', 0), ('switch', 1), ('time', 0)]]

        self.assertEqual(self.phi1.assignment([1, 2]), req_op2)
        self.assertEqual(self.phi2.assignment([1, 2]), req_op1)

    def test_factor_reduce_statename(self):
        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 'medium'), ('time', 'day')])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 1), ('time', 0)])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

    def test_reduce_cpd_statename(self):
        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 'high')])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 0)])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 'high')], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 0)], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

    def test_inference_query_statename(self):
        inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.query(['grade'], evidence={'intel': 0})
        req_op = {'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8]))}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)

        inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0})
        req_op = {'grade': 'F'}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)
Example #42
0
class InputAgent:

	def __init__(self,k_output):
		self.VEbysmodel1 = 0
		self.input = 0
		self.info1 = {}
		self.bys1_input = 0
		self.keyarray = k_output
		self.beta_input = 0
		self.beta_input_list = []
		self.bys1_beta_input = 0

	def generate_input(self):
		self.input = random.randint(0, 4)
		print("input ",self.input)

	def bys1_init(self):
		bysmodel1 = bysmodel([('ED', 'MD'), ('AD', 'MD'), \
		                      ('EU', 'MU'), ('AU', 'MU'), \
		                      ('ER', 'MR'), ('AR', 'MR'), \
		                      ('EL', 'ML'), ('AL', 'ML'), \
		                      ('END', 'PR')])

		EU = tcpd(variable='EU', variable_card=2, \
		          values=[[0.01, 0.99]])
		ED = tcpd(variable='ED', variable_card=2, \
		          values=[[0.01, 0.99]])
		EL = tcpd(variable='EL', variable_card=2, \
		          values=[[0.01, 0.99]])
		ER = tcpd(variable='ER', variable_card=2, \
		          values=[[0.01, 0.99]])
		AU = tcpd(variable='AU', variable_card=2, \
		          values=[[0.01, 0.99]])
		AD = tcpd(variable='AD', variable_card=2, \
		          values=[[0.01, 0.99]])
		AL = tcpd(variable='AL', variable_card=2, \
		          values=[[0.01, 0.99]])
		AR = tcpd(variable='AR', variable_card=2, \
		          values=[[0.01, 0.99]])
		MD = tcpd(variable='MD', variable_card=2, \
		          evidence=['ED', 'AD'], evidence_card=[2, 2],\
		          values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]])
		MU = tcpd(variable='MU', variable_card=2, \
		          evidence=['EU', 'AU'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]])
		ML = tcpd(variable='ML', variable_card=2, \
		          evidence=['EL', 'AL'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]])
		MR = tcpd(variable='MR', variable_card=2, \
		          evidence=['ER', 'AR'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]])
		END = tcpd(variable='END', variable_card=2, \
		           values=[[0.01, 0.99]])
		PR = tcpd(variable='PR', variable_card=2, evidence=['END'], \
		          evidence_card=[2], values=[[1.0, 0.0], [0.0, 1.0]])

		bysmodel1.add_cpds(EU, ED, EL, ER, AU, AD, AL, AR, \
		                   MD, MU, ML, MR, END, PR)

		self.VEbysmodel1 = VariableElimination(bysmodel1)

	def bys1_generate(self,info):
		self.reset_info()
		self.condition_cal(info)

		VEbys1_query = self.VEbysmodel1.query(['MD', 'MU', 'ML', 'MR', 'PR'], \
								evidence=self.info1)
		max_p = -1.0
		max_p2 = -1.0
		target_move = ''
		target_move2 = ''
		counter = 0
		for key in VEbys1_query.keys():
			tempv = VEbys1_query[key].values[1]
			if counter == 0:
				if max_p < tempv:
					max_p = tempv
					target_move = key
			else:
				if max_p < tempv:
					max_p2 = max_p
					target_move2 = target_move
					max_p = tempv
					target_move = key
				elif max_p2 < tempv:
					max_p2 = tempv
					target_move2 = key

			counter += 1

		print(target_move,target_move2,max_p,max_p2)

		self.bys1_input = (self.keyarray.index(target_move),self.keyarray.index(target_move2))


	def condition_cal(self,info):
		player_cord, apple_cord, enemy_array = self.info_generate(info)

		if player_cord[0] < apple_cord[0]:
			self.info1['AR'] = 1
		if player_cord[0] > apple_cord[0]:
			self.info1['AL'] = 1
		if player_cord[1] < apple_cord[1]:
			self.info1['AD'] = 1
		if player_cord[1] > apple_cord[1]:
			self.info1['AU'] = 1

		for enemy in enemy_array:
			if (player_cord[0] == enemy[0]-1) and (player_cord[1] == enemy[1]):
				self.info1['ER'] = 1
			if (player_cord[0] == enemy[0]+1) and (player_cord[1] == enemy[1]):
				self.info1['EL'] = 1
			if (player_cord[1] == enemy[1]-1) and (player_cord[0] == enemy[0]):
				self.info1['ED'] = 1
			if (player_cord[1] == enemy[1]+1) and (player_cord[0] == enemy[0]):
				self.info1['EU'] = 1

		if info['dead']:
			self.info1['END'] = 1

	def reset_info(self):
		self.info1['EU'] = 0
		self.info1['ED'] = 0
		self.info1['ER'] = 0
		self.info1['EL'] = 0
		self.info1['AU'] = 0
		self.info1['AD'] = 0
		self.info1['AL'] = 0
		self.info1['AR'] = 0
		self.info1['END'] = 0

	def info_generate(self,info):
		enemy_array = []
		player = info['player']
		player_cord = (player.x[0],player.y[0])
		for i in range(1,player.length):
			enemy_array.append((player.x[i],player.y[i]))
		for wall in info['wall']:
			enemy_array.append(wall)
		apple_cord = info['apple']

		return (player_cord,apple_cord,enemy_array)

	def info_generate_withp(self,info):
		return (self.info_generate(info),info['player'])

	def beta_generate(self,info,steps):
		self.beta_input_list.clear()

		temp_info = info.copy()
		right_score = self.beta_recursion(temp_info, steps, 0)
		left_score = self.beta_recursion(temp_info, steps, 1)
		up_score = self.beta_recursion(temp_info, steps, 2)
		down_score = self.beta_recursion(temp_info, steps, 3)

		diff = 2
		score_lsit = []
		score_lsit.append(right_score)
		score_lsit.append(left_score)
		score_lsit.append(up_score)
		score_lsit.append(down_score)

		max = -65525
		counter = 0
		target = 0
		for score in score_lsit:
			if score > max:
				max = score
				target = counter
			counter += 1

		self.beta_input = target

		counter = 0
		for score in score_lsit:
			print(score,end="")
			if abs(max-score) <= diff:
				self.beta_input_list.append(counter)
			counter += 1

		print(self.beta_input_list)

	def beta_recursion(self,info,steps,player_move):
		(player_cord, apple_cord, enemy_array), player = self.info_generate_withp(info)
		total_score = 0
		for enemy in enemy_array:
			if self.isCollission(player_cord, enemy):
				return -40
		if self.isCollission(player_cord, apple_cord):
			total_score += 40
		if steps == 0:
			return total_score+20
		else:
			steps -= 1
			player_copy = player.copyself()
			up_score = 0
			down_score = 0
			left_score = 0
			right_score = 0
			if player_move == 0:
				if player_copy.direction != 1:
					player_copy.moveRight()
				else:
					return -40
			elif player_move == 1:
				if player_copy.direction != 0:
					player_copy.moveLeft()
				else:
					return -40
			elif player_move == 2:
				if player_copy.direction != 3:
					player_copy.moveUp()
				else:
					return -40
			elif player_move == 3:
				if player_copy.direction != 2:
					player_copy.moveDown()
				else:
					return -40

			player_copy.update()
			temp_info = info.copy()
			temp_info['player'] = player_copy
			right_score = self.beta_recursion(temp_info, steps, 0)
			left_score = self.beta_recursion(temp_info, steps, 1)
			up_score = self.beta_recursion(temp_info, steps, 2)
			down_score = self.beta_recursion(temp_info, steps, 3)

			total_score += ((right_score+left_score+up_score+down_score)//4)

			return total_score

	def isCollission(self,cord1,cord2):
		if cord1[0] == cord2[0]:
			if cord1[1] == cord2[1]:
				return True
		return False

	def combine_bys1_beta(self,info,steps):
		self.bys1_generate(info)
		self.beta_generate(info,steps)

		if (self.bys1_input[0] in self.beta_input_list):
			self.bys1_beta_input = self.bys1_input[0]
		elif (self.bys1_input[1] in self.beta_input_list):
			self.bys1_beta_input = self.bys1_input[1]
		else:
			self.bys1_beta_input = self.beta_input