Example #1
1
def main():
    # Defining the network structure
    model = BayesianModel([('C', 'H'), ('P', 'H')])

    # H: host
    # P: prize
    # C: contestant

    # Defining the CPDs:
    cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]])
    cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]])
    cpd_h = TabularCPD('H', 3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5],
                                [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5],
                                [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]],
                       evidence=['C', 'P'], evidence_card=[3, 3])

    # Associating the CPDs with the network structure.
    model.add_cpds(cpd_c, cpd_p, cpd_h)

    # Some other methods
    # model.get_cpds()

    # check_model check for the model structure and the associated CPD and
    # returns True if everything is correct otherwise throws an exception
    # print model.check_model()

    # Infering the posterior probability
    infer = VariableElimination(model)
    posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0})
    print(posterior_p['H'])
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'], 'time': ['day', 'night']}

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = Inference(student)
        self.model2 = Inference(student, state_names=self.sn2)
Example #3
0
    def get_model(self):
        """
        Returns the fitted bayesian model

        Example
        ----------
        >>> from pgmpy.readwrite import BIFReader
        >>> reader = BIFReader("bif_test.bif")
        >>> reader.get_model()
        <pgmpy.models.BayesianModel.BayesianModel object at 0x7f20af154320>
        """
        try:
            model = BayesianModel(self.variable_edges)
            model.name = self.network_name
            model.add_nodes_from(self.variable_names)

            tabular_cpds = []
            for var in sorted(self.variable_cpds.keys()):
                values = self.variable_cpds[var]
                cpd = TabularCPD(var, len(self.variable_states[var]), values,
                                 evidence=self.variable_parents[var],
                                 evidence_card=[len(self.variable_states[evidence_var])
                                                for evidence_var in self.variable_parents[var]])
                tabular_cpds.append(cpd)

            model.add_cpds(*tabular_cpds)
            for node, properties in self.variable_properties.items():
                for prop in properties:
                    prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                    model.node[node][prop_name] = prop_value

            return model

        except AttributeError:
            raise AttributeError('First get states of variables, edges, parents and network name')
    def setUp(self):
        nodes = {'c': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(c) Brain Tumor',
                       'YPOS': '11935',
                       'XPOS': '15250',
                       'TYPE': 'discrete'},
                 'a': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(a) Metastatic Cancer',
                       'YPOS': '10465',
                       'XPOS': '13495',
                       'TYPE': 'discrete'},
                 'b': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(b) Serum Calcium Increase',
                       'YPOS': '11965',
                       'XPOS': '11290',
                       'TYPE': 'discrete'},
                 'e': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(e) Papilledema',
                       'YPOS': '13240',
                       'XPOS': '17305',
                       'TYPE': 'discrete'},
                 'd': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(d) Coma',
                       'YPOS': '12985',
                       'XPOS': '13960',
                       'TYPE': 'discrete'}}
        model = BayesianModel([('b', 'd'), ('a', 'b'), ('a', 'c'), ('c', 'd'), ('c', 'e')])
        cpd_distribution = {'a': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8]])},
                            'e': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.6, 0.4]]), 'CONDSET': ['c'], 'CARDINALITY': [2]},
                            'b': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.2, 0.8]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
                            'c': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8],
                                                                        [0.05, 0.95]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
                            'd': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.9, 0.1],
                                                                        [0.7, 0.3],
                                                                        [0.05, 0.95]]), 'CONDSET': ['b', 'c'], 'CARDINALITY': [2, 2]}}

        tabular_cpds = []
        for var, values in cpd_distribution.items():
            evidence = values['CONDSET'] if 'CONDSET' in values else []
            cpd = values['DPIS']
            evidence_card = values['CARDINALITY'] if 'CARDINALITY' in values else []
            states = nodes[var]['STATES']
            cpd = TabularCPD(var, len(states), cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)
        model.add_cpds(*tabular_cpds)

        for var, properties in nodes.items():
            model.node[var] = properties

        self.maxDiff = None
        self.writer = XMLBeliefNetwork.XBNWriter(model=model)
Example #5
0
def bayesnet_examples():
    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd

    student_model = BayesianModel([('D', 'G'),
                                   ('I', 'G'),
                                   ('G', 'L'),
                                   ('I', 'S')])
    # we can generate some random data.
    raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
    data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])
    data_train = data[: int(data.shape[0] * 0.75)]
    student_model.fit(data_train)
    student_model.get_cpds()

    data_test = data[int(0.75 * data.shape[0]): data.shape[0]]
    data_test.drop('D', axis=1, inplace=True)
    student_model.predict(data_test)

    grade_cpd = TabularCPD(
        variable='G',
        variable_card=3,
        values=[[0.3, 0.05, 0.9, 0.5],
                [0.4, 0.25, 0.08, 0.3],
                [0.3, 0.7, 0.02, 0.2]],
        evidence=['I', 'D'],
        evidence_card=[2, 2])
    difficulty_cpd = TabularCPD(
        variable='D',
        variable_card=2,
        values=[[0.6, 0.4]])
    intel_cpd = TabularCPD(
        variable='I',
        variable_card=2,
        values=[[0.7, 0.3]])
    letter_cpd = TabularCPD(
        variable='L',
        variable_card=2,
        values=[[0.1, 0.4, 0.99],
                [0.9, 0.6, 0.01]],
        evidence=['G'],
        evidence_card=[3])
    sat_cpd = TabularCPD(
        variable='S',
        variable_card=2,
        values=[[0.95, 0.2],
                [0.05, 0.8]],
        evidence=['I'],
        evidence_card=[2])
    student_model.add_cpds(grade_cpd, difficulty_cpd,
                           intel_cpd, letter_cpd,
                           sat_cpd)
Example #6
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model or Markov Model.
        Varibles are in the pattern var_0, var_1, var_2 where var_0 is
        0th index variable, var_1 is 1st index variable.

        Return
        ------
        model: an instance of Bayesian or Markov Model.

        Examples
        --------
        >>> reader = UAIReader('TestUAI.uai')
        >>> reader.get_model()
        """
        if self.network_type == 'BAYES':
            model = BayesianModel(self.edges)

            tabular_cpds = []
            for cpd in self.tables:
                child_var = cpd[0]
                states = int(self.domain[child_var])
                arr = list(map(float, cpd[1]))
                values = np.array(arr)
                values = values.reshape(states, values.size // states)
                tabular_cpds.append(TabularCPD(child_var, states, values))

            model.add_cpds(*tabular_cpds)
            return model

        elif self.network_type == 'MARKOV':
            model = MarkovModel(self.edges)

            factors = []
            for table in self.tables:
                variables = table[0]
                cardinality = [int(self.domain[var]) for var in variables]
                value = list(map(float, table[1]))
                factor = DiscreteFactor(variables=variables, cardinality=cardinality, values=value)
                factors.append(factor)

            model.add_factors(*factors)
            return model
Example #7
0
    def get_model(self):
        """
        Returns the model instance of the ProbModel.

        Return
        ---------------
        model: an instance of BayesianModel.

        Examples
        -------
        >>> reader = ProbModelXMLReader()
        >>> reader.get_model()
        """
        if self.probnet.get('type') == "BayesianNetwork":
            model = BayesianModel(self.probnet['edges'].keys())

            tabular_cpds = []
            cpds = self.probnet['Potentials']
            for cpd in cpds:
                var = list(cpd['Variables'].keys())[0]
                states = self.probnet['Variables'][var]['States']
                evidence = cpd['Variables'][var]
                evidence_card = [len(self.probnet['Variables'][evidence_var]['States'])
                                 for evidence_var in evidence]
                arr = list(map(float, cpd['Values'].split()))
                values = np.array(arr)
                values = values.reshape((len(states), values.size//len(states)))
                tabular_cpds.append(TabularCPD(var, len(states), values, evidence, evidence_card))

            model.add_cpds(*tabular_cpds)

            variables = model.nodes()
            for var in variables:
                for prop_name, prop_value in self.probnet['Variables'][var].items():
                    model.node[var][prop_name] = prop_value

            edges = model.edges()
            for edge in edges:
                for prop_name, prop_value in self.probnet['edges'][edge].items():
                    model.edge[edge[0]][edge[1]][prop_name] = prop_value
            return model
        else:
            raise ValueError("Please specify only Bayesian Network.")
Example #8
0
    def get_model(self):
        model = BayesianModel(self.get_edges())
        model.name = self.network_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            cpd = TabularCPD(var, len(self.variable_states[var]), values,
                             evidence=self.variable_parents[var],
                             evidence_card=[len(self.variable_states[evidence_var])
                                            for evidence_var in self.variable_parents[var]])
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for node, properties in self.variable_property.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                model.node[node][prop_name] = prop_value

        return model
Example #9
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model.
        """
        model = BayesianModel(self.edges)
        model.name = self.model_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence = values['CONDSET'] if 'CONDSET' in values else []
            cpd = values['DPIS']
            evidence_card = values['CARDINALITY'] if 'CARDINALITY' in values else []
            states = self.variables[var]['STATES']
            cpd = TabularCPD(var, len(states), cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for var, properties in self.variables.items():
            model.node[var] = properties

        return model
Example #10
0
from pgmpy.models import BayesianModel
from pgmpy.inference import ClusterBeliefPropagation as CBP
from pgmpy.factors import TabularCPD
restaurant_model = BayesianModel([('location', 'cost'),
                                  ('quality', 'cost'),
                                  ('location', 'no_of_people'),
                                  ('cost', 'no_of_people')])
cpd_location = TabularCPD('location', 2, [[0.6, 0.4]])
cpd_quality = TabularCPD('quality', 3, [[0.3, 0.5, 0.2]])
cpd_cost = TabularCPD('cost', 2,
                      [[0.8, 0.6, 0.1, 0.6, 0.6, 0.05],
                       [0.2, 0.1, 0.9, 0.4, 0.4, 0.95]],
                      ['location', 'quality'], [2, 3])
cpd_no_of_people = TabularCPD('no_of_people', 2,
                              [[0.6, 0.8, 0.1, 0.6],
                               [0.4, 0.2, 0.9, 0.4]],
                              ['cost', 'location'], [2, 2])
restaurant_model.add_cpds(cpd_location, cpd_quality,
                          cpd_cost, cpd_no_of_people)
cluster_inference = CBP(restaurant_model)
cluster_inference.query(variables=['cost'])
cluster_inference.query(variables=['cost'],
                        evidence={'no_of_people': 1, 'quality': 0})
Example #11
0
    def configure(self, rf):
        # command format will be the following:
        # trainPGClassifier selfName networkStructure
        print sys.argv

        # read network structure and make graph
        # labels in networkStructure identical to model names
        # networkStructure as a string containing a list of tuples

        # selfName = 'actionPGN'
        # netStructureString = "[('Actions3 exp','actionPGN'), ('Actions4','actionPGN')]"

        selfName = sys.argv[1]
        netStructureString = sys.argv[2]

        netStructure = ast.literal_eval(netStructureString)
        print netStructure

        # collect all model names in a list to extract a unique set
        modelList = []
        for k in netStructure:
            modelList += list(k)
        print list(set(modelList))

        # create a port to connect to /sam/rpc:i to query model path for each model name
        portsList = []
        querySupervisorPort = yarp.RpcClient()
        querySupervisorPortName = '/sam/' + selfName + '/queryRpc'
        querySupervisorPort.open(querySupervisorPortName)

        portsList.append({'name': querySupervisorPortName, 'port': querySupervisorPort})
        yarp.Network.connect(querySupervisorPortName, '/sam/rpc:i')
        # ---------------------------------------------------------------------------------------------------------------
        modelDict = dict()
        failFlag = False
        for j in modelList:
            if j != selfName:
                modNameSplit = j.split(' ')
                cmd = yarp.Bottle()
                cmd.addString('dataDir')
                for l in modNameSplit:
                    cmd.addString(l)
                reply = yarp.Bottle()
                querySupervisorPort.write(cmd, reply)
                if reply.get(0).asString() != 'nack':
                    modelDict[modNameSplit[0]] = {'filename': reply.get(1).asString(), 'pickleData': None}
                    # try:
                    # load pickle for the model file
                    currPickle = pickle.load(open(reply.get(1).asString(), 'rb'))
                    # try loading labelComparisonDict from the pickle
                    if 'labelComparisonDict' in currPickle.keys():
                        modelDict[modNameSplit[0]]['pickleData'] = currPickle['labelComparisonDict']
                        print j, 'labelComparisonDict loaded'
                    else:
                        print modNameSplit[0], 'labelComparisonDict not found'
                        failFlag = True

                    if 'overallPerformanceLabels' in currPickle.keys():
                        modelDict[modNameSplit[0]]['labels'] = currPickle['overallPerformanceLabels']
                        print j, 'overallPerformanceLabels loaded'
                    else:
                        print j, 'overallPerformanceLabels not found'
                        failFlag = True
                    # except:
                    #     failFlag = True
                else:
                    failFlag = True

        print 'FAIL?', failFlag
        if failFlag:
            return False

        modelList = modelDict.keys()
        print modelList

        # ---------------------------------------------------------------------------------------------------------------

        # extract unique lists from the collected data
        # the unique list of pickleData[original] represents the possibleClassifications for each model
        modelDict[selfName] = dict()
        modelDict[selfName]['labels'] = []
        selfModelCol = 1

        for j in modelList:
            modelDict[j]['CPD'] = np.zeros([1, len(modelDict[j]['labels'])])
            print j, 'unique labels:', modelDict[j]['labels']
            print j, 'CPD shape', modelDict[j]['CPD'].shape

            modelDict[selfName]['labels'] += modelDict[j]['labels']
            selfModelCol *= len(modelDict[j]['labels'])
            print

        # the possibleClassifications for both models (outputs of the PGN)
        # are the unique list of the model specific labels for all models
        modelDict[selfName]['labels'] = list(set(modelDict[selfName]['labels']))
        modelDict[selfName]['actualLabels'] = modelDict[j]['pickleData']['original']
        modelDict[selfName]['CPD'] = np.zeros([len(modelDict[selfName]['labels']), selfModelCol])
        print selfName, 'unique labels:', modelDict[selfName]['labels']
        print selfName, 'CPD shape', modelDict[selfName]['CPD'].shape

        # check that original classifications of both are identical
        # otherwise cannot combine them with a single node.
        # This is currently a big limitation that will be removed later
        print modelDict[selfName]['labels']
        for j in modelList:
            print j,
            for k in range(len(modelDict[j]['pickleData']['original'])):
                print modelDict[j]['pickleData']['original'][k]
                if modelDict[j]['pickleData']['original'][k] not in modelDict[selfName]['labels']:
                    modelDict[j]['pickleData']['original'][k] = 'unknown'

        for j in modelList:
            if modelDict[j]['pickleData']['original'] != modelDict[selfName]['actualLabels']:
                failFlag = True
                print 'original classifications of', j, 'are not identical to those of', selfName

        if failFlag:
            return False

        # Update netStructureString to reflect changes in the modelList names
        strSections = netStructureString.split("'")
        for k in range(len(strSections)):
            if len(strSections[k]) > 2 and ',' not in strSections[k]:
                strSections[k] = strSections[k].split(' ')[0]
        netStructureString = "'".join(strSections)
        netStructure = ast.literal_eval(netStructureString)
        # ---------------------------------------------------------------------------------------------------------------
        # iterate through actual labels
        # for each actual label, iterate through models
        # for each model find classification label of this model for current actual label
        # get the index of the current classification and add it to its CPD
        # also calculate which item in the joint CPD needs to be incremented

        for j in range(len(modelDict[selfName]['actualLabels'])):
            currActualLabel = modelDict[selfName]['actualLabels'][j]
            row = modelDict[selfName]['labels'].index(currActualLabel)

            colVar = np.zeros([len(modelList)])
            for k in range(len(modelList)):
                cmod = modelList[k]
                if k != 0:
                    pmod = modelList[k-1]
                    colVar *= len(modelDict[pmod]['labels'])

                colVar[k] = modelDict[cmod]['labels'].index(
                                   modelDict[cmod]['pickleData']['results'][j])
                modelDict[cmod]['CPD'][0, colVar[k]] += 1

            col = sum(colVar)
            modelDict[selfName]['CPD'][row, col] += 1

        # take all CPD's and normalise the matrices
        evidenceCard = copy.deepcopy(modelList)
        for j in modelDict:
            if j == selfName:
                # this is a joint CPD matrix
                # normalise columns to have sum = 1
                modelDict[j]['CPD'] = normalize(modelDict[j]['CPD'], axis=0, norm='l1')
            else:
                # normalise sum of matrix = 1
                modelDict[j]['CPD'] /= np.sum(modelDict[j]['CPD'])
                evidenceCard[evidenceCard.index(j)] = len(modelDict[j]['labels'])
            print modelDict[j]['CPD']

        model = BayesianModel(netStructure)

        # create TabularCPD data structure to nest calculated CPD
        for j in modelDict:
            if j == selfName:
                modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']),
                                                       values=modelDict[j]['CPD'],
                                                       evidence=modelList,
                                                       evidence_card=evidenceCard)
            else:
                modelDict[j]['cpdObject'] = TabularCPD(variable=j,
                                                       variable_card=len(modelDict[j]['labels']),
                                                       values=modelDict[j]['CPD'])

        # Associating the CPDs with the network
        for j in modelDict:
            model.add_cpds(modelDict[j]['cpdObject'])

        # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
        # defined and sum to 1.
        if not model.check_model():
            print 'Model check returned unsuccessful'
            return False

        infer = VariableElimination(model)
        confMatrix = np.zeros(len(modelDict[selfName]['labels']))
        # iterate over all original data and perform classifications to calculate if accuracy with PGN has increased
        for j in range(len(modelDict[selfName]['actualLabels'])):
            currEvidenceDict = dict()
            for k in modelList:
                currEvidenceDict[k] = modelDict[k]['labels'].index(modelDict[k]['pickleData']['results'][j])

            q = infer.query([selfName], currEvidenceDict)

            inferenceClass = modelDict[selfName]['labels'][np.argmax(q[selfName].values)]
            actualClass = modelDict[selfName]['actualLabels'][j]
            confMatrix[modelDict[selfName].index(actualClass), modelDict[selfName].index(inferenceClass)] += 1

        print "%Accuracy with PGN"
        dCalc = SAMTesting.calculateData(modelDict[selfName]['actualLabels'], confMatrix)

        return True
Example #12
0
                                [0.97, 0.95, 0.999, 0.98]],
                        evidence=['Smoker', 'Pollution'],
                        evidence_card=[2, 2])
cpd_xray = TabularCPD(variable='Xray',
                      variable_card=2,
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'],
                      evidence_card=[2])
cpd_dysp = TabularCPD(variable='Dyspnoea',
                      variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
                      evidence=['Cancer'],
                      evidence_card=[2])

# Associating the parameters with the model structure.
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)
print('Model generated by adding conditional probability disttributions(cpds)')

# Checking if the cpds are valid for the model.
print('Checking for Correctness of model : ', end='')
print(cancer_model.check_model())

print('All local idependencies are as follows')
cancer_model.get_independencies()

print('Displaying CPDs')
print(cancer_model.get_cpds('Pollution'))
print(cancer_model.get_cpds('Smoker'))
print(cancer_model.get_cpds('Cancer'))
print(cancer_model.get_cpds('Xray'))
print(cancer_model.get_cpds('Dyspnoea'))
Example #13
0
class TestXMLBIFWriterMethodsString(unittest.TestCase):
    def setUp(self):
        edges = [
            ["family-out", "dog-out"],
            ["bowel-problem", "dog-out"],
            ["family-out", "light-on"],
            ["dog-out", "hear-bark"],
        ]
        cpds = {
            "bowel-problem": np.array([[0.01], [0.99]]),
            "dog-out": np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]),
            "family-out": np.array([[0.15], [0.85]]),
            "hear-bark": np.array([[0.7, 0.3], [0.01, 0.99]]),
            "light-on": np.array([[0.6, 0.4], [0.05, 0.95]]),
        }
        states = {
            "bowel-problem": ["true", "false"],
            "dog-out": ["true", "false"],
            "family-out": ["true", "false"],
            "hear-bark": ["true", "false"],
            "light-on": ["true", "false"],
        }
        parents = {
            "bowel-problem": [],
            "dog-out": ["family-out", "bowel-problem"],
            "family-out": [],
            "hear-bark": ["dog-out"],
            "light-on": ["family-out"],
        }
        properties = {
            "bowel-problem": ["position = (190, 69)"],
            "dog-out": ["position = (155, 165)"],
            "family-out": ["position = (112, 69)"],
            "hear-bark": ["position = (154, 241)"],
            "light-on": ["position = (73, 165)"],
        }

        self.model = BayesianModel(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(
                var,
                len(states[var]),
                values,
                evidence=parents[var],
                evidence_card=[len(states[evidence_var]) for evidence_var in parents[var]],
            )
            tabular_cpds.append(cpd)
        self.model.add_cpds(*tabular_cpds)

        for node, properties in properties.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(), prop.split("="))
                self.model.node[node][prop_name] = prop_value

        self.writer = XMLBIFWriter(model=self.model)

    def test_file(self):
        self.expected_xml = etree.XML(
            """<BIF version="0.3">
  <NETWORK>
    <VARIABLE TYPE="nature">
      <NAME>bowel-problem</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (190, 69)</PROPERTY>
    </VARIABLE>
    <VARIABLE TYPE="nature">
      <NAME>dog-out</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (155, 165)</PROPERTY>
    </VARIABLE>
    <VARIABLE TYPE="nature">
      <NAME>family-out</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (112, 69)</PROPERTY>
    </VARIABLE>
    <VARIABLE TYPE="nature">
      <NAME>hear-bark</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (154, 241)</PROPERTY>
    </VARIABLE>
    <VARIABLE TYPE="nature">
      <NAME>light-on</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (73, 165)</PROPERTY>
    </VARIABLE>
    <DEFINITION>
      <FOR>bowel-problem</FOR>
      <TABLE>0.01 0.99 </TABLE>
    </DEFINITION>
    <DEFINITION>
      <FOR>dog-out</FOR>
      <GIVEN>bowel-problem</GIVEN>
      <GIVEN>family-out</GIVEN>
      <TABLE>0.99 0.01 0.97 0.03 0.9 0.1 0.3 0.7 </TABLE>
    </DEFINITION>
    <DEFINITION>
      <FOR>family-out</FOR>
      <TABLE>0.15 0.85 </TABLE>
    </DEFINITION>
    <DEFINITION>
      <FOR>hear-bark</FOR>
      <GIVEN>dog-out</GIVEN>
      <TABLE>0.7 0.3 0.01 0.99 </TABLE>
    </DEFINITION>
    <DEFINITION>
      <FOR>light-on</FOR>
      <GIVEN>family-out</GIVEN>
      <TABLE>0.6 0.4 0.05 0.95 </TABLE>
    </DEFINITION>
  </NETWORK>
</BIF>"""
        )
        self.maxDiff = None
        self.writer.write_xmlbif("test_bif.xml")
        with open("test_bif.xml", "r") as myfile:
            data = myfile.read()
        self.assertEqual(str(self.writer.__str__()[:-1]), str(etree.tostring(self.expected_xml)))
        self.assertEqual(str(data), str(etree.tostring(self.expected_xml).decode("utf-8")))
Example #14
0
			variable_card=2,
			values=[[0.95,0.2],
					[0.05,0.8]],
			evidence=['Musicianship'],
			evidence_card=[2])

#print(rating_cpd)
#print(difficulty_cpd)
#print(musicianship_cpd)
#print(letter_cpd)
#print(exam_cpd)

print(music_model.edges())

#Add the CPDS to the model
music_model.add_cpds(difficulty_cpd,musicianship_cpd,letter_cpd,exam_cpd,rating_cpd)

#print(music_model.get_cpds())

print(music_model.check_model())

#Create object to perform inference on model
music_infer = VariableElimination(music_model)

#Probability Musicianship
m_1 = music_infer.query(variables=['Musicianship'])
print(m_1['Musicianship'])

#Probability Difficulty
d_l = music_infer.query(variables=['Difficulty'])
print(d_l['Difficulty'])
Example #15
0
class TestGibbsSampling(unittest.TestCase):
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = Factor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)

    def tearDown(self):
        del self.bayesian_model
        del self.markov_model

    @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_bayesian_model', autospec=True)
    @patch('pgmpy.models.MarkovChain.__init__', autospec=True)
    def test_init_bayesian_model(self, init, get_kernel):
        model = MagicMock(spec_set=BayesianModel)
        gibbs = GibbsSampling(model)
        init.assert_called_once_with(gibbs)
        get_kernel.assert_called_once_with(gibbs, model)

    @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_markov_model', autospec=True)
    def test_init_markov_model(self, get_kernel):
        model = MagicMock(spec_set=MarkovModel)
        gibbs = GibbsSampling(model)
        get_kernel.assert_called_once_with(gibbs, model)

    def test_get_kernel_from_bayesian_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_bayesian_model(self.bayesian_model)
        self.assertListEqual(list(gibbs.variables), self.bayesian_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {'diff': 2, 'intel': 2, 'grade': 3})

    def test_get_kernel_from_markov_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_markov_model(self.markov_model)
        self.assertListEqual(list(gibbs.variables), self.markov_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {'A': 2, 'B': 3, 'C': 4, 'D': 2})

    def test_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        sample = self.gibbs.sample(start_state, 2)
        self.assertEquals(len(sample), 2)
        self.assertEquals(len(sample.columns), 3)
        self.assertIn('diff', sample.columns)
        self.assertIn('intel', sample.columns)
        self.assertIn('grade', sample.columns)
        self.assertTrue(set(sample['diff']).issubset({0, 1}))
        self.assertTrue(set(sample['intel']).issubset({0, 1}))
        self.assertTrue(set(sample['grade']).issubset({0, 1, 2}))


    @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True)
    def test_sample_less_arg(self, random_state):
        self.gibbs.state = None
        random_state.return_value = [State('diff', 0), State('intel', 0), State('grade', 0)]
        sample = self.gibbs.sample(size=2)
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(sample), 2)


    def test_generate_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        gen = self.gibbs.generate_sample(start_state, 2)
        samples = [sample for sample in gen]
        self.assertEqual(len(samples), 2)
        self.assertEqual({samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'})
        self.assertEqual({samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'})


    @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True)
    def test_generate_sample_less_arg(self, random_state):
        self.gibbs.state = None
        gen = self.gibbs.generate_sample(size=2)
        samples = [sample for sample in gen]
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(samples), 2)
Example #16
0
class ExactCounterfactual(object):
    """
    A class for performing Exact counterfactual inference in both the Standard and Twin Network approaches.

    N.B.: For logging time, this relies on a custom edit of pgmpy.inference.ExactInference.VariableElimination,
    where the query also returns (as a second return) the time it takes to perform factor marginalization.
    """
    def __init__(self, verbose=False, merge=False):
        """
        Initialize the class.

        Args:
            verbose: whether or not to automatically print the Twin & standard inference times.
            merge: whether or not to perform node merging.
        """
        self.verbose = verbose
        self.merge = merge

    def construct(self, causal_model=None, G=None, df=None, n_samples=20000):
        """
        Init Args:
            twin_network: a TwinNetwork class.
            G: a networkx graph describing the dependency relationships.
            df: a dataframe of samples from that graph, used to construct the conditional probability tables.
        """
        if causal_model is None:
            assert G is not None and df is not None, "Must initialize G and df if no TwinNetwork passed."
            self.G = G
            self.df = df
        else:
            self.scm = causal_model
            self.G = causal_model.G.copy()
            samples = causal_model.sample(n_samples)
            self.df = pd.DataFrame(samples, columns=causal_model.ordering)
        self.model = None  # reset
        self.twin_model = None  # reset
        self.counterfactual_model = None  # reset
        self._compile_model()

    def _compile_model(self):
        """
        Makes a pgmpy model out of a networkx graph and parameterizes its CPD with CPTs estimated from a model.
        """
        self.model = BayesianModel(list(self.G.edges))
        self._construct_CPD()

    def create_twin_network(self, node_of_interest, observed, intervention):
        """
        Generate self.twin_model based on the current model, then merge nodes and eliminate nodes that are conditionally
        independent of the counterfactual node of interest.

        Args:
            node_of_interest: the node of interest to perform inference on.
            observed: a dictionary of {node: observed_value} to condition on.
            intervention: a dictionary of {node: intervention_value} to intervene on.
        """
        self.twin_model = self.model.copy()
        self.twin_model.add_nodes_from([
            "{}tn".format(n) for n in list(self.twin_model.nodes)
            if len(list(self.model.predecessors(n))) != 0
        ])  # add all non-noise nodes
        self.twin_model.add_edges_from([
            ("{}tn".format(pa), "{}tn".format(ch))
            for pa, ch in list(self.model.edges)
            if len(list(self.model.predecessors(pa))) != 0
        ])  # add all non-noise edges
        self.twin_model.add_edges_from([
            (pa, "{}tn".format(ch)) for pa, ch in list(self.model.edges)
            if len(list(self.model.predecessors(pa))) == 0
        ])  #add all noise edges
        # merge nodes if merge flag is true
        if self.merge:
            self.merge_nodes(node_of_interest, intervention)

        # get appropriately ordered CPTs for new merged representation
        duplicate_cpts = []
        for node in self.twin_model.nodes:
            if node[-2:] == "tn":  # if in the twin network model
                node_parents = list(self.twin_model.predecessors(node))
                non_twin_parents = [
                    pa.replace("tn", "") for pa in node_parents
                ]
                cpt = TabularCPD(
                    node, 2,
                    self.model.get_cpds(
                        node[:-2]).reorder_parents(non_twin_parents),
                    node_parents,
                    len(node_parents) * [2])
                duplicate_cpts.append(cpt)
        self.twin_model.add_cpds(*duplicate_cpts)

        # make model efficient
        modified_intervention = {
            n + "tn": intervention[n]
            for n in intervention
        }  # modify for twin network syntax
        self.intervene(modified_intervention, twin=True)
        self._eliminate_conditionally_independent(node_of_interest, observed,
                                                  intervention)

    def _construct_CPD(self, counterfactual=False, df=None):
        cpt_list = []
        if df is None:
            df = self.df
        for node in self.G.nodes:
            cpt_list.append(self._get_node_CPT(node, df))
        if counterfactual:
            self.counterfactual_model.add_cpds(*cpt_list)
        else:
            self.model.add_cpds(*cpt_list)
        self.df = None  # erase df to make object pickleable, otherwise the object becomes unpicklable. (Important for parallel processing)

    def _get_node_CPT(self, node, df=None):
        parents = list(self.G.predecessors(node))
        if len(parents) == 0:  # if root node (latent)
            mu = df[node].mean()
            return TabularCPD(node, 2, values=[[1 - mu], [mu]])
        elif len(parents) > 0:
            mus = df.groupby(parents)[node].mean().reset_index()
            uniques = mus[parents].drop_duplicates()
            parent_combos = list(product(*[[0, 1] for _ in parents]))
            appends = []
            for combo in parent_combos:
                if not (uniques == np.array(combo)
                        ).all(1).any():  # if value not enumerated in sample
                    appends.append(list(combo) +
                                   [0.5])  # add an uninformative prior
            add_df = pd.DataFrame(appends, columns=parents + [node])
            mus = pd.concat((mus, add_df), axis=0)
            mus = mus.sort_values(by=parents)
            mus = mus[node].values
            cpt = np.vstack((1. - mus, mus))
            cpt = TabularCPD(node,
                             2,
                             values=cpt,
                             evidence=parents,
                             evidence_card=len(parents) * [2])
            return cpt

    def query(self, var, observed, counterfactual=False, twin=False):
        """
        Run an arbitrary query by Variable Elimination.

        What is the analytic cost of this? You have to do K noise queries in a graph with K endog nodes + K exog
        nodes in normal CFI. In twin network inference, you have to do 1 query in a graph with 2K endog nodes + K
        exog nodes.

        Args:
            var: variable of interest, i.e. P(Var | Observed)
            observed: a dictionary of {node_name: observed_value} to condition on.
            counterfactual: if true, uses the counterfactual model. (self.counterfactual_model)
            twin: if true, uses the twin network model. (self.twin_model)

        Returns:

        """
        if not isinstance(var, list):
            var = [var]
        if twin:
            # time_start = time.time()
            infer = VariableElimination(self.efficient_twin_model)
            result, time_elapsed = infer.query(var,
                                               evidence=observed,
                                               stopwatch=True)
            self.twin_inference_time = time_elapsed
        elif counterfactual:
            # time_start = time.time()
            infer = VariableElimination(self.counterfactual_model)
            result, time_elapsed = infer.query(var,
                                               evidence=observed,
                                               stopwatch=True)
            self.standard_inference_time = self.joint_inference_time + time_elapsed
        else:
            infer = VariableElimination(self.model)
            result, time_elapsed = infer.query(var,
                                               evidence=observed,
                                               stopwatch=True)
        return result, time_elapsed

    def intervene(self, intervention, counterfactual=False, twin=False):
        """
        Performs the intervention on the BN object by setting the CPT to be deterministic and removing parents.

        Args:
            intervention: a dictionary of {node_name: intervention_value} to intervene on.
        """
        cpt_list = []
        if counterfactual and not twin:
            model = self.counterfactual_model
        elif twin and not counterfactual:
            model = self.twin_model
        else:
            model = self.model
        for node in intervention:
            if node in model.nodes:
                # do-calculus graph surgery: remove edges from parents
                parent_edges = [(pa, node) for pa in model.predecessors(node)]
                model.remove_edges_from(parent_edges)
                model.remove_node("U{}".format(node))
                # set new deterministic CPT
                value = intervention[node]
                cpt = [[], []]
                cpt[value] = [1]
                cpt[int(not bool(value))] = [0]
                new_cpt = TabularCPD(node, 2, values=cpt)
                cpt_list.append(new_cpt)
        # override existing CPTs
        model.add_cpds(*cpt_list)

    def abduction(self, observed, n_samples=None):
        # infer latent joint and store the time it takes
        noise_nodes = [
            n for n in self.G.nodes if len(list(self.G.predecessors(n))) == 0
        ]
        new_joint, time_elapsed = self.query(noise_nodes, observed)
        self.joint_inference_time = time_elapsed
        new_joint = new_joint.values.ravel()
        # sample from network with new latent distribution
        ## sample from joint
        dim = 2**len(noise_nodes)
        val_idx = np.arange(dim)
        # define number of samples
        if n_samples is None:  # be careful with this!
            n_samples = min(
                [30 * 2**(len(list(self.G.nodes)) - len(noise_nodes)), 100000])
        noise_sample_idx = np.random.choice(val_idx,
                                            size=n_samples,
                                            p=new_joint)
        vals = np.array(
            list(product(*[[0, 1] for _ in range(len(noise_nodes))])))
        noise_samples = vals[noise_sample_idx]
        ## intervene in DAG
        self.scm.do(
            {n: noise_samples[:, i]
             for i, n in enumerate(noise_nodes)})
        ## sample with these interventions
        counterfactual_samples = pd.DataFrame(self.scm.sample(n_samples),
                                              columns=self.scm.ordering)
        # construct cpts with new distribution
        self.counterfactual_model = self.model.copy()
        self._construct_CPD(counterfactual=True, df=counterfactual_samples)

    def exact_abduction_prediction(self, noi, ev, intn, n_joint_samples=30000):
        # sample from exact joint distribution
        start = time.time()
        joint = self.query(self.scm._get_exog_nodes(), ev)[0]
        values = np.array(
            list(product(*[range(card) for card in joint.cardinality])))
        n_joint_samples = max([n_joint_samples, 30 * values.shape[0]])
        probabilities = joint.values.ravel()
        idx = np.random.choice(np.arange(values.shape[0]),
                               size=n_joint_samples,
                               p=probabilities)
        samples = values[idx]
        samples = {
            joint.variables[i]: samples[:, i]
            for i in range(len(joint.variables))
        }
        print(time.time() - start)
        # pass joint samples
        self.scm.do(samples)
        # format intervention
        if isinstance(intn[list(intn.keys())[0]], int):
            intn = {k: intn[k] * np.ones(n_joint_samples) for k in intn}
        self.scm.do(intn)
        # sample form new model
        prediction = self.scm.sample(return_pandas=True)[noi]
        return prediction.mean()

    def enumerate_inference(self, noi, ev, intn, n_samples=30000):
        """
        Performs exact counterfactual inference by enumeration.
        """
        intn = {k: intn[k] * np.ones(n_samples) for k in intn}
        joint_sample, joint_prob = self.posterior_enumerate(ev)
        joint_samples = joint_sample[np.random.choice(np.arange(
            joint_sample.shape[0]),
                                                      p=joint_prob,
                                                      size=n_samples)]
        joint_samples = {
            node: joint_samples[:, i]
            for i, node in enumerate(self.scm._get_exog_nodes())
        }
        self.scm.do(joint_samples)
        self.scm.do(intn)
        prediction = self.scm.sample(return_pandas=True)[noi]
        return prediction.mean()

    def posterior_enumerate(self, evidence):
        """
        Inference via enumeration.
        """
        # set up enumeration
        exog_nodes = self.scm._get_exog_nodes()
        endog_nodes = self.scm._get_endog_nodes()
        evidence_array = np.array(
            [evidence[k] for k in endog_nodes if k in evidence])
        evidence_index = [
            i for i, v in enumerate(endog_nodes) if v in evidence
        ]
        combinations = np.array(
            list(product(*[range(2) for _ in range(len(exog_nodes))])))
        probabilities = np.array(
            [self.scm.G.nodes[node]['p'] for node in exog_nodes])
        prior = combinations * probabilities + (1 - combinations) * (
            1 - probabilities)

        def vector_compare(val_prob):
            joint_sample, prior = val_prob
            self.scm.do({
                exog_nodes[i]: joint_sample[i]
                for i in range(len(exog_nodes))
            })
            samp = self.scm.sample().flatten()
            if np.all(evidence_array == samp[evidence_index]):
                return np.product(prior)
            else:
                return 0

        posterior = np.array(
            [i for i in map(vector_compare, zip(combinations, prior))])
        posterior = posterior / np.sum(posterior)
        return combinations, posterior

    def _generate_counterfactual_model(self,
                                       observed,
                                       intervention,
                                       n_samples=None):
        """
        Runs the standard counterfactual inference procedure and returns an intervened model with the posterior.

        Args:
            observed: a dictionary of {node: observed_value} to condition on.
            intervention: a dictionary of {node: intervention_value} to intervene on.
        """
        self.abduction(observed, n_samples)
        self.intervene(intervention, counterfactual=True)

    def standard_counterfactual_query(self,
                                      node_of_interest,
                                      observed,
                                      intervention,
                                      n_samples_for_approx=None):
        """
        Query and sample from the counterfactual model.
        Args:
            observed: a dictionary of {node: observed_value} to condition on.
            intervention: a dictionary of {node: intervention_value} to intervene on.
            n_samples: number of samples to draw from the counterfactual world model.
        """
        # infer latents and generate model, also initializes self.standard_inference_time
        self._generate_counterfactual_model(observed,
                                            intervention,
                                            n_samples=n_samples_for_approx)
        # then run the query
        ## for stability, pass in as evidence a deterministic value for the intervention node
        int_noise_node_values = {
            "U{}".format(k): intervention[k]
            for k in intervention
        }
        q, time_elapsed = self.query(node_of_interest,
                                     observed=int_noise_node_values,
                                     counterfactual=True)
        self.standard_inference_time = self.joint_inference_time + time_elapsed
        return q

    def merge_nodes(self, node_of_interest, intervention):
        """
        Merge nodes in the Twin Counterfactual network. In place modifies `self.twin_model`.
        Works by giving children of the node to be eliminated to its factual counterpart. Operates topologically.
        """
        # find every non-descendant of the intervention nodes
        nondescendant_sets = []
        all_nodes = set([i for i in list(self.model.nodes) if i[0] != 'U'])
        for node in intervention:
            nondescendant_sets.append(
                all_nodes.difference(set(nx.descendants(self.model, node))))
        dont_merge = [node_of_interest] + list(intervention.keys())
        shared_nondescendants = set.intersection(
            *nondescendant_sets) - set(dont_merge)
        # now modify twin network to replace all _tn variables with their regular counterpart
        ordered_nondescendants = [
            n for n in nx.topological_sort(self.model)
            if n in list(shared_nondescendants)
        ]
        for node in ordered_nondescendants:  # start with the oldest nodes
            twin_node = node + "tn"
            tn_children = self.twin_model.successors(twin_node)
            self.twin_model.add_edges_from([(node, c) for c in tn_children])
            self.twin_model.remove_node(twin_node)

    def _eliminate_conditionally_independent(self, node_of_interest, observed,
                                             intervention):
        """
        Generate an "efficient" twin network model by removing nodes that are d-separated from the node
        of interest given observed and intervened variables.

        Args:
            node_of_interest: the node of interest in the query.
            observed: a dictionary of {node: observed_value} to condition on.
            intervention: a dictionary of {node: intervention_value} to intervene on.
        """
        conditioned_on = list(observed) + list(intervention)
        self.efficient_twin_model = self.twin_model.copy()
        for node in [n for n in self.twin_model.nodes if n[-2:] == "tn"]:
            try:
                if not self.efficient_twin_model.is_active_trail(
                        node, node_of_interest + "tn",
                        observed=conditioned_on):
                    self.efficient_twin_model.remove_node(node)
            except:
                pass

    def twin_counterfactual_query(self, node_of_interest, observed,
                                  intervention):
        """
        Query and sample from the counterfactual model.
        Args:
            observed: a dictionary of {node: observed_value} to condition on.
            intervention: a dictionary of {node: intervention_value} to intervene on.
            n_samples: number of samples to draw from the counterfactual world model.
        """
        self.create_twin_network(node_of_interest, observed,
                                 intervention)  # then, create the twin network
        result, time_elapsed = self.query(
            node_of_interest + "tn", observed,
            twin=True)  # log time it takes to do p(Vtn | E)
        return result

    def sample(self, n_samples=1, counterfactual=False, twin=False):
        """
        Perform forward sampling from the model.

        Args:
            n_samples: the number of samples you'd like to return
        """
        if counterfactual:
            model = self.counterfactual_model
        elif twin:
            model = self.twin_model
        else:
            model = self.model
        inference = BayesianModelSampling(model)
        return inference.forward_sample(size=n_samples,
                                        return_type='dataframe')

    def compare_times(self,
                      node_of_interest,
                      observed,
                      intervention,
                      n_samples_for_approx=None):
        """
        Compare the times it takes to do inference in the standard and twin network counterfactual inference
        approaches.

        Args:
            node_of_interest: the node of interest to perform inference on.
            observed: a dictionary of {node: observed_value} to condition on.
            intervention: a dictionary of {node: intervention_value} to intervene on.
        """
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                print("A. Performing Standard Counterfactual Inference.")
                self.standard_counterfactual_query(node_of_interest, observed,
                                                   intervention,
                                                   n_samples_for_approx)
                print("B. Performing Twin Network Counterfactual Inference.")
                # first, reset the graph network
                self.scm.G = self.scm.G_original.copy()
                self.twin_counterfactual_query(node_of_interest, observed,
                                               intervention)
                if self.verbose:
                    print(self.standard_inference_time,
                          self.twin_inference_time)
                return self
        except Exception as e:
            print(e)
            print((node_of_interest, observed, intervention))
            return False  # return False bool to indicate failed experiment.
Example #17
0
def get_game_network():
    BayesNet = BayesianModel()
    BayesNet.add_node('A')
    BayesNet.add_node('B')
    BayesNet.add_node('C')
    BayesNet.add_node('AvB')
    BayesNet.add_node('BvC')
    BayesNet.add_node('CvA')
    BayesNet.add_edge('A', 'AvB')
    BayesNet.add_edge('B', 'AvB')
    BayesNet.add_edge('B', 'BvC')
    BayesNet.add_edge('C', 'BvC')
    BayesNet.add_edge('C', 'CvA')
    BayesNet.add_edge('A', 'CvA')
    cpd_a = TabularCPD('A', 4, values=[[0.15], [0.45], [0.3], [0.1]])
    cpd_b = TabularCPD('B', 4, values=[[0.15], [0.45], [0.3], [0.1]])
    cpd_c = TabularCPD('C', 4, values=[[0.15], [0.45], [0.3], [0.1]])
    cpd_avb = TabularCPD('AvB',
                         3,
                         values=[[
                             0.10, 0.20, 0.15, 0.05, 0.60, 0.10, 0.20, 0.15,
                             0.75, 0.60, 0.10, 0.20, 0.90, 0.75, 0.60, 0.10
                         ],
                                 [
                                     0.10, 0.60, 0.75, 0.90, 0.20, 0.10, 0.60,
                                     0.75, 0.15, 0.20, 0.10, 0.60, 0.05, 0.15,
                                     0.20, 0.10
                                 ],
                                 [
                                     0.80, 0.20, 0.10, 0.05, 0.20, 0.80, 0.20,
                                     0.10, 0.10, 0.20, 0.80, 0.20, 0.05, 0.10,
                                     0.20, 0.80
                                 ]],
                         evidence=['A', 'B'],
                         evidence_card=[4, 4])
    cpd_bvc = TabularCPD('BvC',
                         3,
                         values=[[
                             0.10, 0.20, 0.15, 0.05, 0.60, 0.10, 0.20, 0.15,
                             0.75, 0.60, 0.10, 0.20, 0.90, 0.75, 0.60, 0.10
                         ],
                                 [
                                     0.10, 0.60, 0.75, 0.90, 0.20, 0.10, 0.60,
                                     0.75, 0.15, 0.20, 0.10, 0.60, 0.05, 0.15,
                                     0.20, 0.10
                                 ],
                                 [
                                     0.80, 0.20, 0.10, 0.05, 0.20, 0.80, 0.20,
                                     0.10, 0.10, 0.20, 0.80, 0.20, 0.05, 0.10,
                                     0.20, 0.80
                                 ]],
                         evidence=['B', 'C'],
                         evidence_card=[4, 4])
    cpd_cva = TabularCPD('CvA',
                         3,
                         values=[[
                             0.10, 0.20, 0.15, 0.05, 0.60, 0.10, 0.20, 0.15,
                             0.75, 0.60, 0.10, 0.20, 0.90, 0.75, 0.60, 0.10
                         ],
                                 [
                                     0.10, 0.60, 0.75, 0.90, 0.20, 0.10, 0.60,
                                     0.75, 0.15, 0.20, 0.10, 0.60, 0.05, 0.15,
                                     0.20, 0.10
                                 ],
                                 [
                                     0.80, 0.20, 0.10, 0.05, 0.20, 0.80, 0.20,
                                     0.10, 0.10, 0.20, 0.80, 0.20, 0.05, 0.10,
                                     0.20, 0.80
                                 ]],
                         evidence=['C', 'A'],
                         evidence_card=[4, 4])
    BayesNet.add_cpds(cpd_a, cpd_b, cpd_c, cpd_avb, cpd_bvc, cpd_cva)
    return BayesNet
Example #18
0
class TestXMLBIFWriterMethodsString(unittest.TestCase):
    def setUp(self):
        edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'], ['dog-out', 'hear-bark']]
        cpds = {
            'bowel-problem': np.array([[0.01], [0.99]]),
            'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                 [0.9, 0.1, 0.3, 0.7]]),
            'family-out': np.array([[0.15], [0.85]]),
            'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]),
            'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])
        }
        states = {
            'bowel-problem': ['true', 'false'],
            'dog-out': ['true', 'false'],
            'family-out': ['true', 'false'],
            'hear-bark': ['true', 'false'],
            'light-on': ['true', 'false']
        }
        parents = {
            'bowel-problem': [],
            'dog-out': ['family-out', 'bowel-problem'],
            'family-out': [],
            'hear-bark': ['dog-out'],
            'light-on': ['family-out']
        }
        properties = {
            'bowel-problem': ['position = (190, 69)'],
            'dog-out': ['position = (155, 165)'],
            'family-out': ['position = (112, 69)'],
            'hear-bark': ['position = (154, 241)'],
            'light-on': ['position = (73, 165)']
        }

        self.model = BayesianModel(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(var,
                             len(states[var]),
                             values,
                             evidence=parents[var],
                             evidence_card=[
                                 len(states[evidence_var])
                                 for evidence_var in parents[var]
                             ])
            tabular_cpds.append(cpd)
        self.model.add_cpds(*tabular_cpds)

        for node, properties in properties.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(),
                                            prop.split('='))
                self.model.node[node][prop_name] = prop_value

        self.writer = XMLBIFWriter(model=self.model)

    def test_file(self):
        self.expected_xml = etree.XML("""<BIF version="0.3">
  <NETWORK>
    <VARIABLE TYPE="nature">
      <NAME>bowel-problem</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (190, 69)</PROPERTY>
    </VARIABLE>
    <VARIABLE TYPE="nature">
      <NAME>dog-out</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (155, 165)</PROPERTY>
    </VARIABLE>
    <VARIABLE TYPE="nature">
      <NAME>family-out</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (112, 69)</PROPERTY>
    </VARIABLE>
    <VARIABLE TYPE="nature">
      <NAME>hear-bark</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (154, 241)</PROPERTY>
    </VARIABLE>
    <VARIABLE TYPE="nature">
      <NAME>light-on</NAME>
      <OUTCOME>0</OUTCOME>
      <OUTCOME>1</OUTCOME>
      <PROPERTY>position = (73, 165)</PROPERTY>
    </VARIABLE>
    <DEFINITION>
      <FOR>bowel-problem</FOR>
      <TABLE>0.01 0.99 </TABLE>
    </DEFINITION>
    <DEFINITION>
      <FOR>dog-out</FOR>
      <GIVEN>bowel-problem</GIVEN>
      <GIVEN>family-out</GIVEN>
      <TABLE>0.99 0.01 0.97 0.03 0.9 0.1 0.3 0.7 </TABLE>
    </DEFINITION>
    <DEFINITION>
      <FOR>family-out</FOR>
      <TABLE>0.15 0.85 </TABLE>
    </DEFINITION>
    <DEFINITION>
      <FOR>hear-bark</FOR>
      <GIVEN>dog-out</GIVEN>
      <TABLE>0.7 0.3 0.01 0.99 </TABLE>
    </DEFINITION>
    <DEFINITION>
      <FOR>light-on</FOR>
      <GIVEN>family-out</GIVEN>
      <TABLE>0.6 0.4 0.05 0.95 </TABLE>
    </DEFINITION>
  </NETWORK>
</BIF>""")
        self.maxDiff = None
        self.writer.write_xmlbif("test_bif.xml")
        with open("test_bif.xml", "r") as myfile:
            data = myfile.read()
        self.assertEqual(str(self.writer.__str__()[:-1]),
                         str(etree.tostring(self.expected_xml)))
        self.assertEqual(
            str(data), str(etree.tostring(self.expected_xml).decode('utf-8')))
                        evidence=['X1', 'X2'],
                        evidence_card=[2, 2])

noise = 0.2
for i in range(3):
    parent = 'X{}'.format(i + 1)
    child = 'Y{}'.format(i + 1)
    CPDs[child] = TabularCPD(variable=child,
                             variable_card=2,
                             values=[[1 - noise, noise], [noise, 1 - noise]],
                             evidence=[parent],
                             evidence_card=[2])

# Make model
for cpd in CPDs.values():
    model.add_cpds(cpd)
model.check_model()

from pgmpy.inference import VariableElimination

infer = VariableElimination(model)

# Inference
evidence = {'Y1': 1, 'Y2': 0, 'Y3': 0}
marginals = {}
for i in range(3):
    name = 'X{}'.format(i + 1)
    post = infer.query([name], evidence=evidence).values
    marginals[name] = post
print(marginals)
Example #20
0
#    +---------+---------+---------+---------+---------+
#    | diff    | intel_0 | intel_0 | intel_1 | intel_1 |
#    +---------+---------+---------+---------+---------+
#    | intel   | diff_0  | diff_1  | diff_0  | diff_1  |
#    +---------+---------+---------+---------+---------+
#    | grade_0 | 0.3     | 0.05    | 0.9     | 0.5     |
#    +---------+---------+---------+---------+---------+
#    | grade_1 | 0.4     | 0.25    | 0.08    | 0.3     |
#    +---------+---------+---------+---------+---------+
#    | grade_2 | 0.3     | 0.7     | 0.02    | 0.2     |
#    +---------+---------+---------+---------+---------+

cpd_g = TabularCPD(variable='G',
                   variable_card=2,
                   values=[[0.9, 0.9, 0.8, 0.1], [0.1, 0.1, 0.2, 0.9]],
                   evidence=['F', 'B'],
                   evidence_card=[2, 2])

# Associating the CPDs with the network
model.add_cpds(cpd_b, cpd_f, cpd_g)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
# defined and sum to 1.
model.check_model()
# These defined CPDs can be added to the model. Since, the model already has CPDs associated to variables, it will
# show warning that pmgpy is now replacing those CPDs with the new ones.
model.get_cpds()
print(cpd_g)
infer = VariableElimination(model)
print(infer.query(['G']))
Example #21
0
'''
Created on Sep 21, 2017

@author: Adele
'''
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import BeliefPropagation

G = BayesianModel([('diff', 'grade'), ('intel', 'grade'), ('intel', 'SAT'),
                   ('grade', 'letter')])
diff_cpd = TabularCPD('diff', 2, [[0.2], [0.8]])
intel_cpd = TabularCPD('intel', 3, [[0.5], [0.3], [0.2]])
grade_cpd = TabularCPD(
    'grade',
    3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
        [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
    evidence=['diff', 'intel'],
    evidence_card=[2, 3])
sat_cpd = TabularCPD('SAT',
                     2, [[0.1, 0.2, 0.7], [0.9, 0.8, 0.3]],
                     evidence=['intel'],
                     evidence_card=[3])
letter_cpd = TabularCPD('letter',
                        2, [[0.1, 0.4, 0.8], [0.9, 0.6, 0.2]],
                        evidence=['grade'],
                        evidence_card=[3])
G.add_cpds(diff_cpd, intel_cpd, grade_cpd, sat_cpd, letter_cpd)
bp = BeliefPropagation(G)
bp.calibrate()
Example #22
0
class TestBayesianModelCPD(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'),
                                ('i', 's')])

    def test_active_trail_nodes(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's'])

    def test_active_trail_nodes_args(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's'])
        self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's'])

    def test_is_active_trail_triplets(self):
        self.assertTrue(self.G.is_active_trail('d', 'l'))
        self.assertTrue(self.G.is_active_trail('g', 's'))
        self.assertFalse(self.G.is_active_trail('d', 'i'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='g'))
        self.assertFalse(self.G.is_active_trail('d', 'l', observed='g'))
        self.assertFalse(self.G.is_active_trail('i', 'l', observed='g'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='l'))
        self.assertFalse(self.G.is_active_trail('g', 's', observed='i'))

    def test_is_active_trail(self):
        self.assertFalse(self.G.is_active_trail('d', 's'))
        self.assertTrue(self.G.is_active_trail('s', 'l'))
        self.assertTrue(self.G.is_active_trail('d', 's', observed='g'))
        self.assertFalse(self.G.is_active_trail('s', 'l', observed='g'))

    def test_is_active_trail_args(self):
        self.assertFalse(self.G.is_active_trail('s', 'l', 'i'))
        self.assertFalse(self.G.is_active_trail('s', 'l', 'g'))
        self.assertTrue(self.G.is_active_trail('d', 's', 'l'))
        self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l']))

    def test_get_cpds(self):
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

        self.assertEqual(self.G.get_cpds('d').variable, 'd')

    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')

    def test_add_single_cpd(self):
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_s)
        self.assertListEqual(self.G.get_cpds(), [cpd_s])

    def test_add_multiple_cpds(self):
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)

    def test_check_model(self):
        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6],
                                      [0.8, 0.7, 0.6, 0.4]]),
                                                            ['d', 'i'], [2, 2])

        cpd_s = TabularCPD('s', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                ['i'], 2)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                ['g'], 2)

        self.G.add_cpds(cpd_g, cpd_s, cpd_l)
        self.assertTrue(self.G.check_model())


    def test_check_model1(self):
        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                 ['i'], 2)
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6],
                                      [0.8, 0.7, 0.6, 0.4]]),
                                                            ['d', 's'], [2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                 ['l'], 2)
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                 ['d'], 2)
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6],
                                      [0.8, 0.7, 0.6, 0.4]]),
                                                           ['d', 'i'], [2, 2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6],
                                      [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4]]),
                                                            ['g', 'd', 'i'], [2, 2, 2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

    def test_check_model2(self):
        cpd_s = TabularCPD('s', 2, 
                            np.array([[0.5, 0.3],
                                      [0.8, 0.7]]),
                                                ['i'], 2)
        self.G.add_cpds(cpd_s)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_s)


        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6],
                                      [0.3, 0.7, 0.6, 0.4]]),
                                                            ['d', 'i'], [2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3],
                                      [0.1, 0.7]]),
                                                ['g'], 2)
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)


    def tearDown(self):
        del self.G
Example #23
0
                       values=[[.3, .05, .9, .5], [.4, .25, .08, .3],
                               [.3, .7, .02, .2]],
                       evidence=['I', 'D'],
                       evidence_card=[2, 2])

letter_cpd = TabularCPD(variable='L',
                        variable_card=2,
                        values=[[.1, 0.4, .99], [.9, 0.6, .01]],
                        evidence=['G'],
                        evidence_card=[3])

# buildind model
student_model = BayesianModel([('D', 'G'), ('I', 'G'), ('I', 'S'), ('G', 'L')])

# adding cpds
student_model.add_cpds(difficulty_cpd, intelligence_cpd, sat_cpd, grade_cpd,
                       letter_cpd)

model_name = "student"
# json dump part
import json

data = {
    "name": "student",
    "short_description": "give short desc here",
    "long_description": "give long desc here",
    "nodes": {
        "L": "Letter",
        "D": "Difficulty",
        "G": "Grade",
        "I": "Intelligence",
        "S": "SAT Scores"
Example #24
0
class TestInferenceBase(unittest.TestCase):
    def setUp(self):
        self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')])
        a_cpd = TabularCPD('a', 2, [[0.4, 0.6]])
        b_cpd = TabularCPD('b', 2, [[0.2, 0.4], [0.3, 0.4]], evidence='a',
                           evidence_card=[2])
        c_cpd = TabularCPD('c', 2, [[0.1, 0.2], [0.3, 0.4]], evidence='b',
                           evidence_card=[2])
        d_cpd = TabularCPD('d', 2, [[0.4, 0.3], [0.2, 0.1]], evidence='c',
                           evidence_card=[2])
        e_cpd = TabularCPD('e', 2, [[0.3, 0.2], [0.4, 0.1]], evidence='d',
                           evidence_card=[2])
        self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd)

        self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'), ('c', 'd')])
        factor_1 = Factor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100]))
        factor_2 = Factor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20]))
        factor_3 = Factor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1]))
        factor_4 = Factor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40]))
        self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)

    def test_bayesian_inference_init(self):
        infer_bayesian = Inference(self.bayesian)
        self.assertEqual(set(infer_bayesian.variables), {'a', 'b', 'c', 'd', 'e'})
        self.assertEqual(infer_bayesian.cardinality, {'a': 2, 'b': 2, 'c': 2,
                                                      'd': 2, 'e': 2})
        self.assertIsInstance(infer_bayesian.factors, defaultdict)
        self.assertEqual(set(infer_bayesian.factors['a']),
                         set([self.bayesian.get_cpds('a').to_factor(),
                              self.bayesian.get_cpds('b').to_factor()]))
        self.assertEqual(set(infer_bayesian.factors['b']),
                         set([self.bayesian.get_cpds('b').to_factor(),
                              self.bayesian.get_cpds('c').to_factor()]))
        self.assertEqual(set(infer_bayesian.factors['c']),
                         set([self.bayesian.get_cpds('c').to_factor(),
                              self.bayesian.get_cpds('d').to_factor()]))
        self.assertEqual(set(infer_bayesian.factors['d']),
                         set([self.bayesian.get_cpds('d').to_factor(),
                              self.bayesian.get_cpds('e').to_factor()]))
        self.assertEqual(set(infer_bayesian.factors['e']),
                         set([self.bayesian.get_cpds('e').to_factor()]))

    def test_markov_inference_init(self):
        infer_markov = Inference(self.markov)
        self.assertEqual(set(infer_markov.variables), {'a', 'b', 'c', 'd'})
        self.assertEqual(infer_markov.cardinality, {'a': 2, 'b': 2, 'c': 2, 'd': 2})
        self.assertEqual(infer_markov.factors, {'a': [Factor(['a', 'b'], [2, 2],
                                                             np.array([100, 1, 1, 100])),
                                                      Factor(['a', 'c'], [2, 2],
                                                             np.array([40, 30, 100, 20]))],
                                                'b': [Factor(['a', 'b'], [2, 2],
                                                             np.array([100, 1, 1, 100])),
                                                      Factor(['b', 'd'], [2, 2],
                                                             np.array([1, 100, 100, 1]))],
                                                'c': [Factor(['a', 'c'], [2, 2],
                                                             np.array([40, 30, 100, 20])),
                                                      Factor(['c', 'd'], [2, 2],
                                                             np.array([60, 60, 40, 40]))],
                                                'd': [Factor(['b', 'd'], [2, 2],
                                                             np.array([1, 100, 100, 1])),
                                                      Factor(['c', 'd'], [2, 2],
                                                             np.array([60, 60, 40, 40]))]})
Example #25
0
def factorise(state, graph):
    state_names = [n.name for n in state.dom.names]
    graph_nodes = graph.get_nodes()
    graph_names = [n.get_name() for n in graph_nodes]
    l = len(state_names)
    if l != len(graph_names):
        raise Exception('Missing domain names of state in factorisation')
    if set(state_names) != set(graph_names):
        raise Exception('Non-matching graph and state names in factorisation')
    # make dictionary of names and corresponding masks
    masks = {}
    for i in range(l):
        ls = l * [0]
        ls[i] = 1
        masks[state_names[i]] = ls
    # dictionary to be filled with cpts = conditional probability tables
    model = BayesianModel()
    model.add_nodes_from(graph_names)
    for node in graph_nodes:
        parents = get_parents(node)
        key = node.get_name()
        mask_cod = masks[key]
        if len(parents) == 0:
            # marginalise for initial nodes
            initial_state = state % mask_cod
            if len(initial_state.dom) > 1:
                raise Exception('Initial states must have dimension 1')
            dom_card = len(initial_state.dom[0])
            state_array = initial_state.array
            cpd_array = np.zeros((dom_card, 1))
            for i in range(dom_card):
                cpd_array[i][0] = state_array[i]
            cpd = TabularCPD(variable=key,
                             variable_card=dom_card,
                             values=cpd_array)
            model.add_cpds(cpd)
        else:
            # add edges and form conditional probility for internal nodes
            for p in parents:
                model.add_edge(p, key)
            mask_dom = mask_summation([masks[p] for p in parents])
            chan = state[mask_cod:mask_dom]
            cod = chan.cod
            dom = chan.dom
            if len(cod) > 1:
                raise Exception('Domains must have dimension 1')
            chan_array = chan.array
            print("* ", key, len(dom[0]), len(cod[0]), chan_array.shape)
            prod = reduce(operator.mul, [len(d) for d in dom], 1)
            cpd_array = np.zeros((len(cod[0]), prod))
            for i in range(len(dom[0])):
                cpd_array[i] = [
                    chan_array[i][j] for j in np.ndindex(*chan_array[i].shape)
                ]
            cpd = TabularCPD(variable=key,
                             variable_card=len(cod[0]),
                             values=cpd_array,
                             evidence=parents,
                             evidence_card=[len(d) for d in dom])
            model.add_cpds(cpd)
    if not model.check_model():
        raise Exception('Constructed model does not pass check')
    return model
Example #26
0
def bayesnet():
    """
    References:
        https://class.coursera.org/pgm-003/lecture/17
        http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html
        http://www3.cs.stonybrook.edu/~sael/teaching/cse537/Slides/chapter14d_BP.pdf
        http://www.cse.unsw.edu.au/~cs9417ml/Bayes/Pages/PearlPropagation.html
        https://github.com/pgmpy/pgmpy.git
        http://pgmpy.readthedocs.org/en/latest/
        http://nipy.bic.berkeley.edu:5000/download/11
    """
    # import operator as op
    # # Enumerate all possible events
    # varcard_list = list(map(op.attrgetter('variable_card'), cpd_list))
    # _esdat = list(ut.iprod(*map(range, varcard_list)))
    # _escol = list(map(op.attrgetter('variable'), cpd_list))
    # event_space = pd.DataFrame(_esdat, columns=_escol)

    # # Custom compression of event space to inspect a specific graph
    # def compress_space_flags(event_space, var1, var2, var3, cmp12_):
    #     """
    #     var1, var2, cmp_ = 'Lj', 'Lk', op.eq
    #     """
    #     import vtool as vt
    #     data = event_space
    #     other_cols = ut.setdiff_ordered(data.columns.tolist(), [var1, var2, var3])
    #     case_flags12 = cmp12_(data[var1], data[var2]).values
    #     # case_flags23 = cmp23_(data[var2], data[var3]).values
    #     # case_flags = np.logical_and(case_flags12, case_flags23)
    #     case_flags = case_flags12
    #     case_flags = case_flags.astype(np.int64)
    #     subspace = np.hstack((case_flags[:, None], data[other_cols].values))
    #     sel_ = vt.unique_row_indexes(subspace)
    #     flags = np.logical_and(mask, case_flags)
    #     return flags

    # # Build special cases
    # case_same   = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.eq)]
    # case_diff = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.ne)]
    # special_cases = [
    #     case_same,
    #     case_diff,
    # ]

    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd
    from pgmpy.inference import BeliefPropagation  # NOQA
    from pgmpy.inference import VariableElimination  # NOQA

    name_nice = ['n1', 'n2', 'n3']
    score_nice = ['low', 'high']
    match_nice = ['diff', 'same']
    num_names = len(name_nice)
    num_scores = len(score_nice)
    nid_basis = list(range(num_names))
    score_basis = list(range(num_scores))

    semtype2_nice = {
        'score': score_nice,
        'name': name_nice,
        'match': match_nice,
    }
    var2_cpd = {
    }
    globals()['semtype2_nice'] = semtype2_nice
    globals()['var2_cpd'] = var2_cpd

    name_combo = np.array(list(ut.iprod(nid_basis, nid_basis)))
    combo_is_same = name_combo.T[0] == name_combo.T[1]
    def get_expected_scores_prob(level1, level2):
        part1 = combo_is_same * level1
        part2 = (1 - combo_is_same) * (1 - (level2))
        expected_scores_level = part1 + part2
        return expected_scores_level

    # def make_cpd():

    def name_cpd(aid):
        from pgmpy.factors import TabularCPD
        cpd = TabularCPD(
            variable='N' + aid,
            variable_card=num_names,
            values=[[1.0 / num_names] * num_names])
        cpd.semtype = 'name'
        return cpd

    name_cpds = [name_cpd('i'), name_cpd('j'), name_cpd('k')]
    var2_cpd.update(dict(zip([cpd.variable for cpd in name_cpds], name_cpds)))
    if True:
        num_same_diff = 2
        samediff_measure = np.array([
            # get_expected_scores_prob(.12, .2),
            # get_expected_scores_prob(.88, .8),
            get_expected_scores_prob(0, 0),
            get_expected_scores_prob(1, 1),
        ])
        samediff_vals = (samediff_measure / samediff_measure.sum(axis=0)).tolist()
        def samediff_cpd(aid1, aid2):
            cpd = TabularCPD(
                variable='A' + aid1 + aid2,
                variable_card=num_same_diff,
                values=samediff_vals,
                evidence=['N' + aid1, 'N' + aid2],  # [::-1],
                evidence_card=[num_names, num_names])  # [::-1])
            cpd.semtype = 'match'
            return cpd
        samediff_cpds = [samediff_cpd('i', 'j'), samediff_cpd('j', 'k'), samediff_cpd('k', 'i')]
        var2_cpd.update(dict(zip([cpd.variable for cpd in samediff_cpds], samediff_cpds)))

        if True:
            def score_cpd(aid1, aid2):
                semtype = 'score'
                evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2]
                evidence_cpds = [var2_cpd[key] for key in evidence]
                evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds]
                evidence_card = list(map(len, evidence_nice))
                evidence_states = list(ut.iprod(*evidence_nice))
                variable_basis = semtype2_nice[semtype]

                variable_values = []
                for mystate in variable_basis:
                    row = []
                    for state in evidence_states:
                        if state[0] == state[1]:
                            if state[2] == 'same':
                                val = .2 if mystate == 'low' else .8
                            else:
                                val = 1
                                # val = .5 if mystate == 'low' else .5
                        elif state[0] != state[1]:
                            if state[2] == 'same':
                                val = .5 if mystate == 'low' else .5
                            else:
                                val = 1
                                # val = .9 if mystate == 'low' else .1
                        row.append(val)
                    variable_values.append(row)

                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=len(variable_basis),
                    values=variable_values,
                    evidence=evidence,  # [::-1],
                    evidence_card=evidence_card)  # [::-1])
                cpd.semtype = semtype
                return cpd
        else:
            score_values = [
                [.8, .1],
                [.2, .9],
            ]
            def score_cpd(aid1, aid2):
                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=num_scores,
                    values=score_values,
                    evidence=['A' + aid1 + aid2],  # [::-1],
                    evidence_card=[num_same_diff])  # [::-1])
                cpd.semtype = 'score'
                return cpd

        score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')]
        cpd_list = name_cpds + score_cpds + samediff_cpds
    else:
        score_measure = np.array([get_expected_scores_prob(level1, level2)
                                  for level1, level2 in
                                  zip(np.linspace(.1, .9, num_scores),
                                      np.linspace(.2, .8, num_scores))])

        score_values = (score_measure / score_measure.sum(axis=0)).tolist()

        def score_cpd(aid1, aid2):
            cpd = TabularCPD(
                variable='S' + aid1 + aid2,
                variable_card=num_scores,
                values=score_values,
                evidence=['N' + aid1, 'N' + aid2],
                evidence_card=[num_names, num_names])
            cpd.semtype = 'score'
            return cpd
        score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')]
        cpd_list = name_cpds + score_cpds
        pass

    input_graph = []
    for cpd in cpd_list:
        if cpd.evidence is not None:
            for evar in cpd.evidence:
                input_graph.append((evar, cpd.variable))
    name_model = BayesianModel(input_graph)
    name_model.add_cpds(*cpd_list)

    var2_cpd.update(dict(zip([cpd.variable for cpd in cpd_list], cpd_list)))
    globals()['var2_cpd'] = var2_cpd

    varnames = [cpd.variable for cpd in cpd_list]

    # --- PRINT CPDS ---

    cpd = score_cpds[0]
    def print_cpd(cpd):
        print('CPT: %r' % (cpd,))
        index = semtype2_nice[cpd.semtype]
        if cpd.evidence is None:
            columns = ['None']
        else:
            basis_lists = [semtype2_nice[var2_cpd[ename].semtype] for ename in cpd.evidence]
            columns = [','.join(x) for x in ut.iprod(*basis_lists)]
        data = cpd.get_cpd()
        print(pd.DataFrame(data, index=index, columns=columns))

    for cpd in name_model.get_cpds():
        print('----')
        print(cpd._str('phi'))
        print_cpd(cpd)

    # --- INFERENCE ---

    Ni = name_cpds[0]

    event_space_combos = {}
    event_space_combos[Ni.variable] = 0  # Set ni to always be Fred
    for cpd in cpd_list:
        if cpd.semtype == 'score':
            event_space_combos[cpd.variable] = list(range(cpd.variable_card))
    evidence_dict = ut.all_dict_combinations(event_space_combos)

    # Query about name of annotation k given different event space params

    def pretty_evidence(evidence):
        return [key + '=' + str(semtype2_nice[var2_cpd[key].semtype][val])
                for key, val in evidence.items()]

    def print_factor(factor):
        row_cards = factor.cardinality
        row_vars = factor.variables
        values = factor.values.reshape(np.prod(row_cards), 1).flatten()
        # col_cards = 1
        # col_vars = ['']
        basis_lists = list(zip(*list(ut.iprod(*[range(c) for c in row_cards]))))
        nice_basis_lists = []
        for varname, basis in zip(row_vars, basis_lists):
            cpd = var2_cpd[varname]
            _nice_basis = ut.take(semtype2_nice[cpd.semtype], basis)
            nice_basis = ['%s=%s' % (varname, val) for val in _nice_basis]
            nice_basis_lists.append(nice_basis)
        row_lbls = [', '.join(sorted(x)) for x in zip(*nice_basis_lists)]
        print(ut.repr3(dict(zip(row_lbls, values)), precision=3, align=True, key_order_metric='-val'))

    # name_belief = BeliefPropagation(name_model)
    name_belief = VariableElimination(name_model)
    import pgmpy
    import six  # NOQA

    def try_query(evidence):
        print('--------')
        query_vars = ut.setdiff_ordered(varnames, list(evidence.keys()))
        evidence_str = ', '.join(pretty_evidence(evidence))
        probs = name_belief.query(query_vars, evidence)
        factor_list = probs.values()
        joint_factor = pgmpy.factors.factor_product(*factor_list)
        print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ')')
        # print(six.text_type(joint_factor))
        factor = joint_factor  # NOQA
        # print_factor(factor)
        # import utool as ut
        print(ut.hz_str([(f._str(phi_or_p='phi')) for f in factor_list]))

    for evidence in evidence_dict:
        try_query(evidence)

    evidence = {'Aij': 1, 'Ajk': 1, 'Aki': 1, 'Ni': 0}
    try_query(evidence)

    evidence = {'Aij': 0, 'Ajk': 0, 'Aki': 0, 'Ni': 0}
    try_query(evidence)

    globals()['score_nice'] = score_nice
    globals()['name_nice'] = name_nice
    globals()['score_basis'] = score_basis
    globals()['nid_basis'] = nid_basis

    print('Independencies')
    print(name_model.get_independencies())
    print(name_model.local_independencies([Ni.variable]))

    # name_belief = BeliefPropagation(name_model)
    # # name_belief = VariableElimination(name_model)
    # for case in special_cases:
    #     test_data = case.drop('Lk', axis=1)
    #     test_data = test_data.reset_index(drop=True)
    #     print('----')
    #     for i in range(test_data.shape[0]):
    #         evidence = test_data.loc[i].to_dict()
    #         probs = name_belief.query(['Lk'], evidence)
    #         factor = probs['Lk']
    #         probs = factor.values
    #         evidence_ = evidence.copy()
    #         evidence_['Li'] = name_nice[evidence['Li']]
    #         evidence_['Lj'] = name_nice[evidence['Lj']]
    #         evidence_['Sij'] = score_nice[evidence['Sij']]
    #         evidence_['Sjk'] = score_nice[evidence['Sjk']]
    #         nice2_prob = ut.odict(zip(name_nice, probs.tolist()))
    #         ut.print_python_code('P(Lk | {evidence}) = {cpt}'.format(
    #             evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)),
    #             cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val')
    #         ))

    # for case in special_cases:
    #     test_data = case.drop('Lk', axis=1)
    #     test_data = test_data.drop('Lj', axis=1)
    #     test_data = test_data.reset_index(drop=True)
    #     print('----')
    #     for i in range(test_data.shape[0]):
    #         evidence = test_data.loc[i].to_dict()
    #         query_vars = ['Lk', 'Lj']
    #         probs = name_belief.query(query_vars, evidence)
    #         for queryvar in query_vars:
    #             factor = probs[queryvar]
    #             print(factor._str('phi'))
    #             probs = factor.values
    #             evidence_ = evidence.copy()
    #             evidence_['Li'] = name_nice[evidence['Li']]
    #             evidence_['Sij'] = score_nice[evidence['Sij']]
    #             evidence_['Sjk'] = score_nice[evidence['Sjk']]
    #             nice2_prob = ut.odict(zip([queryvar + '=' + x for x in name_nice], probs.tolist()))
    #             ut.print_python_code('P({queryvar} | {evidence}) = {cpt}'.format(
    #                 query_var=query_var,
    #                 evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)),
    #                 cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val')
    #             ))

    # _ draw model

    import plottool as pt
    import networkx as netx
    fig = pt.figure()  # NOQA
    fig.clf()
    ax = pt.gca()

    netx_nodes = [(node, {}) for node in name_model.nodes()]
    netx_edges = [(etup[0], etup[1], {}) for etup in name_model.edges()]
    netx_graph = netx.DiGraph()
    netx_graph.add_nodes_from(netx_nodes)
    netx_graph.add_edges_from(netx_edges)

    # pos = netx.graphviz_layout(netx_graph)
    pos = netx.pydot_layout(netx_graph, prog='dot')
    netx.draw(netx_graph, pos=pos, ax=ax, with_labels=True)

    pt.plt.savefig('foo.png')
    ut.startfile('foo.png')
Example #27
0
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD

# Defining the network structure
model = BayesianModel([('C', 'H'), ('P', 'H')])

# Defining the CPDs:
cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]])
cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]])
cpd_h = TabularCPD(
    'H',
    3, [[0, 0, 0, 0, 0.5, 1, 0, 1, 0.5], [0.5, 0, 1, 0, 0, 0, 1, 0, 0.5],
        [0.5, 1, 0, 1, 0.5, 0, 0, 0, 0]],
    evidence=['C', 'P'],
    evidence_card=[3, 3])

# Associating the CPDs with the network structure.
model.add_cpds(cpd_c, cpd_p, cpd_h)

# Some other methods
print model.get_cpds()
print model.check_model()
# Infering the posterior probability
from pgmpy.inference import VariableElimination

infer = VariableElimination(model)
posterior_p = infer.query(['P'], evidence={'C': 0, 'H': 2})
print(posterior_p['P'])
Example #28
0
def get_game_network():
    """Create a Bayes Net representation of the game problem.
    Name the nodes as "A","B","C","AvB","BvC" and "CvA".  """

    # Add nodes
    BayesNet = BayesianModel()
    BayesNet.add_node("A")
    BayesNet.add_node("B")
    BayesNet.add_node("C")
    BayesNet.add_node("AvB")
    BayesNet.add_node("BvC")
    BayesNet.add_node("CvA")

    # Add edges
    BayesNet.add_edge("A", "AvB")
    BayesNet.add_edge("A", "CvA")
    BayesNet.add_edge("B", "AvB")
    BayesNet.add_edge("B", "BvC")
    BayesNet.add_edge("C", "BvC")
    BayesNet.add_edge("C", "CvA")

    # Add probabilities
    cpd_A = TabularCPD("A", 4, values=[[0.15], [0.45], [0.30], [0.10]])
    cpd_B = TabularCPD("B", 4, values=[[0.15], [0.45], [0.30], [0.10]])
    cpd_C = TabularCPD("C", 4, values=[[0.15], [0.45], [0.30], [0.10]])
    cpd_avb = TabularCPD("AvB",
                         3,
                         values=[[
                             0.1, 0.2, 0.15, 0.05, 0.6, 0.1, 0.2, 0.15, 0.75,
                             0.6, 0.1, 0.2, 0.9, 0.75, 0.6, 0.1
                         ],
                                 [
                                     0.1, 0.6, 0.75, 0.9, 0.2, 0.1, 0.6, 0.75,
                                     0.15, 0.2, 0.1, 0.6, 0.05, 0.15, 0.2, 0.1
                                 ],
                                 [
                                     0.8, 0.2, 0.1, 0.05, 0.2, 0.8, 0.2, 0.1,
                                     0.1, 0.2, 0.8, 0.2, 0.05, 0.1, 0.2, 0.8
                                 ]],
                         evidence=["A", "B"],
                         evidence_card=[4, 4])
    cpd_bvc = TabularCPD("BvC",
                         3,
                         values=[[
                             0.1, 0.2, 0.15, 0.05, 0.6, 0.1, 0.2, 0.15, 0.75,
                             0.6, 0.1, 0.2, 0.9, 0.75, 0.6, 0.1
                         ],
                                 [
                                     0.1, 0.6, 0.75, 0.9, 0.2, 0.1, 0.6, 0.75,
                                     0.15, 0.2, 0.1, 0.6, 0.05, 0.15, 0.2, 0.1
                                 ],
                                 [
                                     0.8, 0.2, 0.1, 0.05, 0.2, 0.8, 0.2, 0.1,
                                     0.1, 0.2, 0.8, 0.2, 0.05, 0.1, 0.2, 0.8
                                 ]],
                         evidence=["B", "C"],
                         evidence_card=[4, 4])
    cpd_avc = TabularCPD("CvA",
                         3,
                         values=[[
                             0.1, 0.2, 0.15, 0.05, 0.6, 0.1, 0.2, 0.15, 0.75,
                             0.6, 0.1, 0.2, 0.9, 0.75, 0.6, 0.1
                         ],
                                 [
                                     0.1, 0.6, 0.75, 0.9, 0.2, 0.1, 0.6, 0.75,
                                     0.15, 0.2, 0.1, 0.6, 0.05, 0.15, 0.2, 0.1
                                 ],
                                 [
                                     0.8, 0.2, 0.1, 0.05, 0.2, 0.8, 0.2, 0.1,
                                     0.1, 0.2, 0.8, 0.2, 0.05, 0.1, 0.2, 0.8
                                 ]],
                         evidence=["C", "A"],
                         evidence_card=[4, 4])
    BayesNet.add_cpds(cpd_A, cpd_B, cpd_C, cpd_avb, cpd_bvc, cpd_avc)

    return BayesNet
class TestBayesianModelMethods(unittest.TestCase):

    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                                ('d', 'e'), ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'], evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of('g')
        ancenstors2 = self.G2._get_ancestors_of('d')
        ancenstors3 = self.G2._get_ancestors_of(['i', 'l'])
        self.assertEqual(ancenstors1, {'d', 'i', 'g'})
        self.assertEqual(ancenstors2, {'d'})
        self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h')

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'), Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032,
               0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1
Example #30
0
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
model.fit(values)
predict_data = predict_data.copy()
predict_data.drop('E', axis=1, inplace=True)
#print predict_data
y_pred = model.predict(predict_data)
y_prob = model.predict_probability(predict_data)


from pgmpy.sampling import BayesianModelSampling
model = BayesianModel([('D', 'G'), ('I', 'G')])
cpd_d = TabularCPD('D', 2, [[0.6], [0.4]])
cpd_i = TabularCPD('I', 2, [[0.7], [0.3]])
cpd_g = TabularCPD('G', 3, 
                   [[0.3, 0.05, 0.9, 0.5], 
                    [0.4, 0.25, 0.08, 0.3], 
                    [0.3, 0.7, 0.02, 0.2]],
                   ['D', 'I'], [2, 2])
model.add_cpds(cpd_d, cpd_i, cpd_g)
 
infer = BayesianModelSampling(model)
data = infer.forward_sample(500)
#print data
 
model.fit(data, estimator=MaximumLikelihoodEstimator)
for cpd in model.get_cpds():
    print("CPD of {variable}:".format(variable=cpd.variable))
    print(cpd)


Example #31
0
class TestVariableElimination(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.bayesian_inference.query(['J'])
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.bayesian_inference.query(['Q', 'J'])
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))



    def test_max_marginal(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G']), 0.5714, decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R']),
                                    0.4055, decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R', 'A']),
                                    0.3260, decimal=4)

    def test_map_query(self):
        map_query = self.bayesian_inference.map_query()
        self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0,
                                         'L': 0})

    def test_map_query_with_evidence(self):
        map_query = self.bayesian_inference.map_query(['A', 'R', 'L'],
                                                      {'J': 0, 'Q': 1, 'G': 0})
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.bayesian_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.bayesian_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.bayesian_inference
        del self.bayesian_model
Example #32
0
class TestUAIWriter(unittest.TestCase):
    def setUp(self):
        self.maxDiff = None
        edges = [['family-out', 'dog-out'],
                 ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'],
                 ['dog-out', 'hear-bark']]
        cpds = {'bowel-problem': np.array([[0.01],
                                           [0.99]]),
                'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                     [0.9, 0.1, 0.3, 0.7]]),
                'family-out': np.array([[0.15],
                                        [0.85]]),
                'hear-bark': np.array([[0.7, 0.3],
                                       [0.01, 0.99]]),
                'light-on': np.array([[0.6, 0.4],
                                      [0.05, 0.95]])}
        states = {'bowel-problem': ['true', 'false'],
                  'dog-out': ['true', 'false'],
                  'family-out': ['true', 'false'],
                  'hear-bark': ['true', 'false'],
                  'light-on': ['true', 'false']}
        parents = {'bowel-problem': [],
                   'dog-out': ['family-out', 'bowel-problem'],
                   'family-out': [],
                   'hear-bark': ['dog-out'],
                   'light-on': ['family-out']}

        self.bayesmodel = BayesianModel(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(var, len(states[var]), values,
                             evidence=parents[var],
                             evidence_card=[len(states[evidence_var])
                                            for evidence_var in parents[var]])
            tabular_cpds.append(cpd)
        self.bayesmodel.add_cpds(*tabular_cpds)
        self.bayeswriter = UAIWriter(self.bayesmodel)

        edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
        self.markovmodel = MarkovModel(edges)
        tables = [(['var_0', 'var_1'],
                   ['4.000', '2.400', '1.000', '0.000']),
                  (['var_0', 'var_1', 'var_2'],
                   ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000',
                    '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])]
        domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'}
        factors = []
        for table in tables:
            variables = table[0]
            cardinality = [int(domain[var]) for var in variables]
            values = list(map(float, table[1]))
            factor = Factor(variables, cardinality, values)
            factors.append(factor)
        self.markovmodel.add_factors(*factors)
        self.markovwriter = UAIWriter(self.markovmodel)

    def test_bayes_model(self):
        self.expected_bayes_file = """BAYES
5
2 2 2 2 2
5
1 0
3 2 0 1
1 2
2 1 3
2 2 4

2
0.01 0.99
8
0.99 0.01 0.97 0.03 0.9 0.1 0.3 0.7
2
0.15 0.85
4
0.7 0.3 0.01 0.99
4
0.6 0.4 0.05 0.95"""
        self.assertEqual(str(self.bayeswriter.__str__()), str(self.expected_bayes_file))

    def test_markov_model(self):
        self.expected_markov_file = """MARKOV
3
2 2 3
2
2 0 1
3 0 1 2

4
4.0 2.4 1.0 0.0
12
2.25 3.25 3.75 0.0 0.0 10.0 1.875 4.0 3.333 2.0 2.0 3.4"""
        self.assertEqual(str(self.markovwriter.__str__()), str(self.expected_markov_file))
                       'A': ['1', '0'],
                       'D': ['1', '0']
                   })
#Boss capabilites
cpd_B = TabularCPD(variable='B',
                   variable_card=2,
                   values=[[0.01, 0.45, 0.28, 0.95], [0.99, 0.55, 0.72, 0.05]],
                   evidence=['L', 'M'],
                   evidence_card=[2, 2],
                   state_names={
                       'B': ['1', '0'],
                       'L': ['1', '0'],
                       'M': ['1', '0']
                   })

G.add_cpds(cpd_A, cpd_D, cpd_M, cpd_L, cpd_B)

infer = VariableElimination(G)
l_dist = infer.query(['L'])
b_dist = infer.query(['B'])
ex1_dist = infer.query(['B'], evidence={'L': '0', 'M': '0'})
ex2_dist = infer.query(['B'], evidence={'L': '0', 'M': '1'})
ex3_dist = infer.query(['B'], evidence={'L': '1', 'M': '0'})
ex4_dist = infer.query(['B'], evidence={'L': '1', 'M': '1'})

print(l_dist)
print()
print(b_dist)
print()
print(ex1_dist)
print()
Example #34
0
lifeExp_cpd = TabularCPD(
variable='L',variable_card=2,
values=[[0.7, 0.25, 0.1],[0.3, 0.75, 0.9]],
evidence=['I'],evidence_card=[3])

femaleSchool_cpd = TabularCPD(
variable='F',variable_card=2,
values=[[.8,.3,.2],[.2,.7,.8]],
evidence=['I'],evidence_card=[3])

govtCorr_cpd = TabularCPD(
variable='G', variable_card=2,
values=[[.05,.4,.55,.85],[.95,.6,.45,.15]],
evidence=['F','L'],evidence_card=[2,2])

model.add_cpds(income_cpd, lifeExp_cpd,
femaleSchool_cpd, govtCorr_cpd)

inference = VariableElimination(model)
prob_G = inference.query(variables='G',evidence=dict([('L',1)]))
print(prob_G['G'])
# +-----+----------+
# | G   |   phi(G) |
# |-----+----------|
# | G_0 |   0.7292 |
# | G_1 |   0.2708 |
# +-----+----------+
inference = VariableElimination(model)
prob_G = inference.query(variables='G',evidence=dict([('F',1)]))
print(prob_G['G'])
# +-----+----------+
# | G   |   phi(G) |
Example #35
0
                    values=[[0.01], [0.99]])
cpd_do = TabularCPD(variable='dog_out',
                    variable_card=2,
                    values=[[0.99, 0.9, 0.97, 0.3], [0.01, 0.1, 0.03, 0.7]],
                    evidence=['family_out', 'bowel_problem'],
                    evidence_card=[2, 2])
cpd_lo = TabularCPD(variable='light_on',
                    variable_card=2,
                    values=[[0.6, 0.05], [0.4, 0.95]],
                    evidence=['family_out'],
                    evidence_card=[2])
cpd_hb = TabularCPD(variable='hear_bark',
                    variable_card=2,
                    values=[[0.7, 0.01], [0.3, 0.99]],
                    evidence=['dog_out'],
                    evidence_card=[2])

#integrity checking
model.add_cpds(cpd_fo, cpd_bp, cpd_do, cpd_lo, cpd_hb)
model.check_model()

junction_tree = model.to_junction_tree()
print(junction_tree.nodes())

infer_bp = BeliefPropagation(junction_tree)
print(
    infer_bp.query(['family_out'], evidence={
        'light_on': 0,
        'hear_bark': 1
    })['family_out'])
Example #36
0
class TestBeliefPropagation(unittest.TestCase):
    def setUp(self):
        self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')),
                                           (('B', 'C'), ('C', 'D'))])
        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))
        self.junction_tree.add_factors(phi1, phi2, phi3)

        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

    def test_calibrate_clique_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.calibrate()
        clique_belief = belief_propagation.get_clique_beliefs()

        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))

        b_A_B = phi1 * (phi3.marginalize(['D'], inplace=False) * phi2).marginalize(['C'], inplace=False)
        b_B_C = phi2 * (phi1.marginalize(['A'], inplace=False) * phi3.marginalize(['D'], inplace=False))
        b_C_D = phi3 * (phi1.marginalize(['A'], inplace=False) * phi2).marginalize(['B'], inplace=False)

        np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values)
        np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values)
        np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values)

    def test_calibrate_sepset_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.calibrate()
        sepset_belief = belief_propagation.get_sepset_beliefs()

        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))

        b_B = (phi1 * (phi3.marginalize(['D'], inplace=False) *
                       phi2).marginalize(['C'], inplace=False)).marginalize(['A'], inplace=False)

        b_C = (phi2 * (phi1.marginalize(['A'], inplace=False) *
                       phi3.marginalize(['D'], inplace=False))).marginalize(['B'], inplace=False)

        np_test.assert_array_almost_equal(sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values)
        np_test.assert_array_almost_equal(sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values)

    def test_max_calibrate_clique_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        clique_belief = belief_propagation.get_clique_beliefs()

        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))

        b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(['C'], inplace=False)
        b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False))
        b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize(['B'], inplace=False)

        np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values)
        np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values)
        np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values)

    def test_max_calibrate_sepset_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        sepset_belief = belief_propagation.get_sepset_beliefs()

        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))

        b_B = (phi1 * (phi3.maximize(['D'], inplace=False) *
                       phi2).maximize(['C'], inplace=False)).maximize(['A'], inplace=False)

        b_C = (phi2 * (phi1.maximize(['A'], inplace=False) *
                       phi3.maximize(['D'], inplace=False))).maximize(['B'], inplace=False)

        np_test.assert_array_almost_equal(sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values)
        np_test.assert_array_almost_equal(sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        query_result = belief_propagation.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        query_result = belief_propagation.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        query_result = belief_propagation.query(variables=['J'],
                                                evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        query_result = belief_propagation.query(variables=['J', 'Q'],
                                                evidence={'A': 0, 'R': 0,
                                                          'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_map_query(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        map_query = belief_propagation.map_query()
        self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0,
                                         'L': 0})

    def test_map_query_with_evidence(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        map_query = belief_propagation.map_query(['A', 'R', 'L'],
                                                 {'J': 0, 'Q': 1, 'G': 0})
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def tearDown(self):
        del self.junction_tree
        del self.bayesian_model
Example #37
0
def create_models(hidden_suspects, hidden_weapons, hidden_rooms):
    """
    Creates Bayesian Networks for the BN Player.
    :param hidden_suspects: Number of hidden suspect cards, which is the domain size of the variables in
    the suspects BN.
    :param hidden_weapons: Number of hidden weapon cards, which is the domain size of the variables in
    the weapons BN.
    :param hidden_rooms:  Number of hidden room cards, which is the domain size of the variables in
    the rooms BN.
    :return: a tuple (suspects model, weapons model, rooms model)
    """

    # Suspects model:
    suspects_model = BayesianModel([('s', 's1_p2'), ('s', 's2_p2'),
                                    ('s1_p2', 's2_p2'), ('s', 's1_p3'),
                                    ('s1_p2', 's1_p3'), ('s2_p2', 's1_p3')])

    s_cpd = TabularCPD(variable='s',
                       variable_card=hidden_suspects,
                       values=create_cpd_table(0, hidden_suspects))
    s1_p2_cpd = TabularCPD(variable='s1_p2',
                           variable_card=hidden_suspects,
                           values=create_cpd_table(1, hidden_suspects),
                           evidence=['s'],
                           evidence_card=[hidden_suspects])
    s2_p2_cpd = TabularCPD(variable='s2_p2',
                           variable_card=hidden_suspects,
                           values=create_cpd_table(2, hidden_suspects),
                           evidence=['s', 's1_p2'],
                           evidence_card=[hidden_suspects, hidden_suspects])
    s1_p3_cpd = TabularCPD(
        variable='s1_p3',
        variable_card=hidden_suspects,
        values=create_cpd_table(3, hidden_suspects),
        evidence=['s', 's1_p2', 's2_p2'],
        evidence_card=[hidden_suspects, hidden_suspects, hidden_suspects])
    suspects_model.add_cpds(s_cpd, s1_p2_cpd, s2_p2_cpd, s1_p3_cpd)

    # Weapons model:
    weapons_model = BayesianModel([('w', 'w1_p2'), ('w', 'w2_p2'),
                                   ('w', 'w1_p3'), ('w', 'w2_p3'),
                                   ('w1_p2', 'w2_p2'), ('w1_p2', 'w1_p3'),
                                   ('w1_p2', 'w2_p3'), ('w2_p2', 'w1_p3'),
                                   ('w2_p2', 'w2_p3'), ('w1_p3', 'w2_p3')])

    w_cpd = TabularCPD(variable='w',
                       variable_card=hidden_weapons,
                       values=create_cpd_table(0, hidden_weapons))
    w1_p2_cpd = TabularCPD(variable='w1_p2',
                           variable_card=hidden_weapons,
                           values=create_cpd_table(1, hidden_weapons),
                           evidence=['w'],
                           evidence_card=[hidden_weapons])
    w2_p2_cpd = TabularCPD(variable='w2_p2',
                           variable_card=hidden_weapons,
                           values=create_cpd_table(2, hidden_weapons),
                           evidence=['w', 'w1_p2'],
                           evidence_card=[hidden_weapons, hidden_weapons])
    w1_p3_cpd = TabularCPD(
        variable='w1_p3',
        variable_card=hidden_weapons,
        values=create_cpd_table(3, hidden_weapons),
        evidence=['w', 'w1_p2', 'w2_p2'],
        evidence_card=[hidden_weapons, hidden_weapons, hidden_weapons])
    w2_p3_cpd = TabularCPD(variable='w2_p3',
                           variable_card=hidden_weapons,
                           values=create_cpd_table(4, hidden_weapons),
                           evidence=['w', 'w1_p2', 'w2_p2', 'w1_p3'],
                           evidence_card=[
                               hidden_weapons, hidden_weapons, hidden_weapons,
                               hidden_weapons
                           ])
    weapons_model.add_cpds(w_cpd, w1_p2_cpd, w2_p2_cpd, w1_p3_cpd, w2_p3_cpd)

    # Rooms model:
    rooms_model = BayesianModel([('r', 'r1_p2'), ('r', 'r2_p2'),
                                 ('r', 'r1_p3'), ('r', 'r2_p3'),
                                 ('r1_p2', 'r2_p2'), ('r1_p2', 'r1_p3'),
                                 ('r1_p2', 'r2_p3'), ('r2_p2', 'r1_p3'),
                                 ('r2_p2', 'r2_p3'), ('r1_p3', 'r2_p3')])
    r_cpd = TabularCPD(variable='r',
                       variable_card=hidden_rooms,
                       values=create_cpd_table(0, hidden_rooms))
    r1_p2_cpd = TabularCPD(variable='r1_p2',
                           variable_card=hidden_rooms,
                           values=create_cpd_table(1, hidden_rooms),
                           evidence=['r'],
                           evidence_card=[hidden_rooms])
    r2_p2_cpd = TabularCPD(variable='r2_p2',
                           variable_card=hidden_rooms,
                           values=create_cpd_table(2, hidden_rooms),
                           evidence=['r', 'r1_p2'],
                           evidence_card=[hidden_rooms, hidden_rooms])
    r1_p3_cpd = TabularCPD(
        variable='r1_p3',
        variable_card=hidden_rooms,
        values=create_cpd_table(3, hidden_rooms),
        evidence=['r', 'r1_p2', 'r2_p2'],
        evidence_card=[hidden_rooms, hidden_rooms, hidden_rooms])
    r2_p3_cpd = TabularCPD(
        variable='r2_p3',
        variable_card=hidden_rooms,
        values=create_cpd_table(4, hidden_rooms),
        evidence=['r', 'r1_p2', 'r2_p2', 'r1_p3'],
        evidence_card=[hidden_rooms, hidden_rooms, hidden_rooms, hidden_rooms])
    rooms_model.add_cpds(r_cpd, r1_p2_cpd, r2_p2_cpd, r1_p3_cpd, r2_p3_cpd)
    return suspects_model, weapons_model, rooms_model
Example #38
0
class TestInferenceBase(unittest.TestCase):
    def setUp(self):
        self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'),
                                       ('d', 'e')])
        a_cpd = TabularCPD('a', 2, [[0.4, 0.6]])
        b_cpd = TabularCPD('b',
                           2, [[0.2, 0.4], [0.8, 0.6]],
                           evidence=['a'],
                           evidence_card=[2])
        c_cpd = TabularCPD('c',
                           2, [[0.1, 0.2], [0.9, 0.8]],
                           evidence=['b'],
                           evidence_card=[2])
        d_cpd = TabularCPD('d',
                           2, [[0.4, 0.3], [0.6, 0.7]],
                           evidence=['c'],
                           evidence_card=[2])
        e_cpd = TabularCPD('e',
                           2, [[0.3, 0.2], [0.7, 0.8]],
                           evidence=['d'],
                           evidence_card=[2])
        self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd)

        self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'),
                                   ('c', 'd')])
        factor_1 = DiscreteFactor(['a', 'b'], [2, 2],
                                  np.array([100, 1, 1, 100]))
        factor_2 = DiscreteFactor(['a', 'c'], [2, 2],
                                  np.array([40, 30, 100, 20]))
        factor_3 = DiscreteFactor(['b', 'd'], [2, 2],
                                  np.array([1, 100, 100, 1]))
        factor_4 = DiscreteFactor(['c', 'd'], [2, 2],
                                  np.array([60, 60, 40, 40]))
        self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)

    def test_bayesian_inference_init(self):
        infer_bayesian = Inference(self.bayesian)
        self.assertEqual(set(infer_bayesian.variables),
                         {'a', 'b', 'c', 'd', 'e'})
        self.assertEqual(infer_bayesian.cardinality, {
            'a': 2,
            'b': 2,
            'c': 2,
            'd': 2,
            'e': 2
        })
        self.assertIsInstance(infer_bayesian.factors, defaultdict)
        self.assertEqual(
            set(infer_bayesian.factors['a']),
            set([
                self.bayesian.get_cpds('a').to_factor(),
                self.bayesian.get_cpds('b').to_factor()
            ]))
        self.assertEqual(
            set(infer_bayesian.factors['b']),
            set([
                self.bayesian.get_cpds('b').to_factor(),
                self.bayesian.get_cpds('c').to_factor()
            ]))
        self.assertEqual(
            set(infer_bayesian.factors['c']),
            set([
                self.bayesian.get_cpds('c').to_factor(),
                self.bayesian.get_cpds('d').to_factor()
            ]))
        self.assertEqual(
            set(infer_bayesian.factors['d']),
            set([
                self.bayesian.get_cpds('d').to_factor(),
                self.bayesian.get_cpds('e').to_factor()
            ]))
        self.assertEqual(set(infer_bayesian.factors['e']),
                         set([self.bayesian.get_cpds('e').to_factor()]))

    def test_markov_inference_init(self):
        infer_markov = Inference(self.markov)
        self.assertEqual(set(infer_markov.variables), {'a', 'b', 'c', 'd'})
        self.assertEqual(infer_markov.cardinality, {
            'a': 2,
            'b': 2,
            'c': 2,
            'd': 2
        })
        self.assertEqual(
            infer_markov.factors, {
                'a': [
                    DiscreteFactor(['a', 'b'], [2, 2],
                                   np.array([100, 1, 1, 100])),
                    DiscreteFactor(['a', 'c'], [2, 2],
                                   np.array([40, 30, 100, 20]))
                ],
                'b': [
                    DiscreteFactor(['a', 'b'], [2, 2],
                                   np.array([100, 1, 1, 100])),
                    DiscreteFactor(['b', 'd'], [2, 2],
                                   np.array([1, 100, 100, 1]))
                ],
                'c': [
                    DiscreteFactor(['a', 'c'], [2, 2],
                                   np.array([40, 30, 100, 20])),
                    DiscreteFactor(['c', 'd'], [2, 2],
                                   np.array([60, 60, 40, 40]))
                ],
                'd': [
                    DiscreteFactor(['b', 'd'], [2, 2],
                                   np.array([1, 100, 100, 1])),
                    DiscreteFactor(['c', 'd'], [2, 2],
                                   np.array([60, 60, 40, 40]))
                ]
            })
Example #39
0
class TestVariableElimination(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD('J',
                           2,
                           values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           evidence=['A', 'R'],
                           evidence_card=[2, 2])
        cpd_q = TabularCPD('Q',
                           2,
                           values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=['J'],
                           evidence_card=[2])
        cpd_l = TabularCPD('L',
                           2,
                           values=[[0.9, 0.45, 0.8, 0.1],
                                   [0.1, 0.55, 0.2, 0.9]],
                           evidence=['J', 'G'],
                           evidence_card=[2, 2])
        cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 1
                                                     })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 0,
                                                         'G': 0,
                                                         'L': 1
                                                     })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.bayesian_inference.query(['J'])
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.bayesian_inference.query(['Q', 'J'])
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 1
                                                     })
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 1
                                                     })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 0,
                                                         'G': 0,
                                                         'L': 1
                                                     })
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 0,
                                                         'G': 0,
                                                         'L': 1
                                                     })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_max_marginal(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(),
                                    0.1659,
                                    decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G'
                                                                          ]),
                                    0.5714,
                                    decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(
            ['G', 'R']),
                                    0.4055,
                                    decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(
            ['G', 'R', 'A']),
                                    0.3260,
                                    decimal=4)

    def test_map_query(self):
        map_query = self.bayesian_inference.map_query()
        self.assertDictEqual(map_query, {
            'A': 1,
            'R': 1,
            'J': 1,
            'Q': 1,
            'G': 0,
            'L': 0
        })

    def test_map_query_with_evidence(self):
        map_query = self.bayesian_inference.map_query(['A', 'R', 'L'], {
            'J': 0,
            'Q': 1,
            'G': 0
        })
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.bayesian_inference.induced_graph(
            ['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.bayesian_inference.induced_width(
            ['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.bayesian_inference
        del self.bayesian_model
Example #40
0
class TestBayesianModelCPD(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'),
                                ('i', 's')])

    def test_active_trail_nodes(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's'])

    def test_active_trail_nodes_args(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's'])
        self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's'])

    def test_is_active_trail_triplets(self):
        self.assertTrue(self.G.is_active_trail('d', 'l'))
        self.assertTrue(self.G.is_active_trail('g', 's'))
        self.assertFalse(self.G.is_active_trail('d', 'i'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='g'))
        self.assertFalse(self.G.is_active_trail('d', 'l', observed='g'))
        self.assertFalse(self.G.is_active_trail('i', 'l', observed='g'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='l'))
        self.assertFalse(self.G.is_active_trail('g', 's', observed='i'))

    def test_is_active_trail(self):
        self.assertFalse(self.G.is_active_trail('d', 's'))
        self.assertTrue(self.G.is_active_trail('s', 'l'))
        self.assertTrue(self.G.is_active_trail('d', 's', observed='g'))
        self.assertFalse(self.G.is_active_trail('s', 'l', observed='g'))

    def test_is_active_trail_args(self):
        self.assertFalse(self.G.is_active_trail('s', 'l', 'i'))
        self.assertFalse(self.G.is_active_trail('s', 'l', 'g'))
        self.assertTrue(self.G.is_active_trail('d', 's', 'l'))
        self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l']))

    def test_get_cpds(self):
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

        self.assertEqual(self.G.get_cpds('d').variable, 'd')

    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')

    def test_add_single_cpd(self):
        from pgmpy.factors import TabularCPD
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_s)
        self.assertListEqual(self.G.get_cpds(), [cpd_s])

    def test_add_multiple_cpds(self):
        from pgmpy.factors import TabularCPD
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)

    def tearDown(self):
        del self.G
Example #41
0
class TestBeliefPropagation(unittest.TestCase):
    def setUp(self):
        self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')),
                                           (('B', 'C'), ('C', 'D'))])
        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))
        self.junction_tree.add_factors(phi1, phi2, phi3)

        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD('J',
                           2,
                           values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           evidence=['A', 'R'],
                           evidence_card=[2, 2])
        cpd_q = TabularCPD('Q',
                           2,
                           values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=['J'],
                           evidence_card=[2])
        cpd_l = TabularCPD('L',
                           2,
                           values=[[0.9, 0.45, 0.8, 0.1],
                                   [0.1, 0.55, 0.2, 0.9]],
                           evidence=['J', 'G'],
                           evidence_card=[2, 2])
        cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

    def test_calibrate_clique_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.calibrate()
        clique_belief = belief_propagation.get_clique_beliefs()

        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))

        b_A_B = phi1 * (phi3.marginalize(['D'], inplace=False) *
                        phi2).marginalize(['C'], inplace=False)
        b_B_C = phi2 * (phi1.marginalize(['A'], inplace=False) *
                        phi3.marginalize(['D'], inplace=False))
        b_C_D = phi3 * (phi1.marginalize(['A'], inplace=False) *
                        phi2).marginalize(['B'], inplace=False)

        np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values,
                                          b_A_B.values)
        np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values,
                                          b_B_C.values)
        np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values,
                                          b_C_D.values)

    def test_calibrate_sepset_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.calibrate()
        sepset_belief = belief_propagation.get_sepset_beliefs()

        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))

        b_B = (phi1 *
               (phi3.marginalize(['D'], inplace=False) * phi2).marginalize(
                   ['C'], inplace=False)).marginalize(['A'], inplace=False)

        b_C = (phi2 * (phi1.marginalize(['A'], inplace=False) *
                       phi3.marginalize(['D'], inplace=False))).marginalize(
                           ['B'], inplace=False)

        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values,
            b_B.values)
        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values,
            b_C.values)

    def test_max_calibrate_clique_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        clique_belief = belief_propagation.get_clique_beliefs()

        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))

        b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(
            ['C'], inplace=False)
        b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) *
                        phi3.maximize(['D'], inplace=False))
        b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize(
            ['B'], inplace=False)

        np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values,
                                          b_A_B.values)
        np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values,
                                          b_B_C.values)
        np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values,
                                          b_C_D.values)

    def test_max_calibrate_sepset_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        sepset_belief = belief_propagation.get_sepset_beliefs()

        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))

        b_B = (phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(
            ['C'], inplace=False)).maximize(['A'], inplace=False)

        b_C = (phi2 * (phi1.maximize(['A'], inplace=False) *
                       phi3.maximize(['D'], inplace=False))).maximize(
                           ['B'], inplace=False)

        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values,
            b_B.values)
        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values,
            b_C.values)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        query_result = belief_propagation.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        query_result = belief_propagation.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        query_result = belief_propagation.query(variables=['J'],
                                                evidence={
                                                    'A': 0,
                                                    'R': 1
                                                })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        query_result = belief_propagation.query(variables=['J', 'Q'],
                                                evidence={
                                                    'A': 0,
                                                    'R': 0,
                                                    'G': 0,
                                                    'L': 1
                                                })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_map_query(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        map_query = belief_propagation.map_query()
        self.assertDictEqual(map_query, {
            'A': 1,
            'R': 1,
            'J': 1,
            'Q': 1,
            'G': 0,
            'L': 0
        })

    def test_map_query_with_evidence(self):
        belief_propagation = BeliefPropagation(self.bayesian_model)
        map_query = belief_propagation.map_query(['A', 'R', 'L'], {
            'J': 0,
            'Q': 1,
            'G': 0
        })
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def tearDown(self):
        del self.junction_tree
        del self.bayesian_model
Example #42
0
class TestDirectedGraphCPDOperations(unittest.TestCase):
    def setUp(self):
        self.graph = BayesianModel()

    def test_add_single_cpd(self):
        cpd = TabularCPD('grade', 2, np.random.rand(2, 4),
                         ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd)
        self.assertListEqual(self.graph.get_cpds(), [cpd])

    def test_add_multiple_cpds(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertListEqual(self.graph.get_cpds(), [cpd1, cpd2, cpd3])

    def test_remove_single_cpd(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds(cpd1)
        self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3])

    def test_remove_multiple_cpds(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds(cpd1, cpd3)
        self.assertListEqual(self.graph.get_cpds(), [cpd2])

    def test_remove_single_cpd_string(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds('diff')
        self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3])

    def test_remove_multiple_cpds_string(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds('diff', 'grade')
        self.assertListEqual(self.graph.get_cpds(), [cpd2])

    def test_get_cpd_for_node(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertEqual(self.graph.get_cpds('diff'), cpd1)
        self.assertEqual(self.graph.get_cpds('intel'), cpd2)
        self.assertEqual(self.graph.get_cpds('grade'), cpd3)

    def test_get_cpd_raises_error(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertRaises(ValueError, self.graph.get_cpds, 'sat')

    def tearDown(self):
        del self.graph
Example #43
0
class TestBayesianModelMethods(unittest.TestCase):

    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                                ('d', 'e'), ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'], evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'), Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032,
               0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1
class TestUAIWriter(unittest.TestCase):
    def setUp(self):
        self.maxDiff = None
        edges = [['family-out', 'dog-out'],
                 ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'],
                 ['dog-out', 'hear-bark']]
        cpds = {'bowel-problem': np.array([[0.01],
                                           [0.99]]),
                'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                     [0.9, 0.1, 0.3, 0.7]]),
                'family-out': np.array([[0.15],
                                        [0.85]]),
                'hear-bark': np.array([[0.7, 0.3],
                                       [0.01, 0.99]]),
                'light-on': np.array([[0.6, 0.4],
                                      [0.05, 0.95]])}
        states = {'bowel-problem': ['true', 'false'],
                  'dog-out': ['true', 'false'],
                  'family-out': ['true', 'false'],
                  'hear-bark': ['true', 'false'],
                  'light-on': ['true', 'false']}
        parents = {'bowel-problem': [],
                   'dog-out': ['bowel-problem', 'family-out'],
                   'family-out': [],
                   'hear-bark': ['dog-out'],
                   'light-on': ['family-out']}

        self.bayesmodel = BayesianModel(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(var, len(states[var]), values,
                             evidence=parents[var],
                             evidence_card=[len(states[evidence_var])
                                            for evidence_var in parents[var]])
            tabular_cpds.append(cpd)
        self.bayesmodel.add_cpds(*tabular_cpds)
        self.bayeswriter = UAIWriter(self.bayesmodel)

        edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
        self.markovmodel = MarkovModel(edges)
        tables = [(['var_0', 'var_1'],
                   ['4.000', '2.400', '1.000', '0.000']),
                  (['var_0', 'var_1', 'var_2'],
                   ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000',
                    '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])]
        domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'}
        factors = []
        for table in tables:
            variables = table[0]
            cardinality = [int(domain[var]) for var in variables]
            values = list(map(float, table[1]))
            factor = DiscreteFactor(variables, cardinality, values)
            factors.append(factor)
        self.markovmodel.add_factors(*factors)
        self.markovwriter = UAIWriter(self.markovmodel)

    def test_bayes_model(self):
        self.expected_bayes_file = """BAYES
5
2 2 2 2 2
5
1 0
3 2 0 1
1 2
2 1 3
2 2 4

2
0.01 0.99
8
0.99 0.01 0.97 0.03 0.9 0.1 0.3 0.7
2
0.15 0.85
4
0.7 0.3 0.01 0.99
4
0.6 0.4 0.05 0.95"""
        self.assertEqual(str(self.bayeswriter.__str__()), str(self.expected_bayes_file))

    def test_markov_model(self):
        self.expected_markov_file = """MARKOV
3
2 2 3
2
2 0 1
3 0 1 2

4
4.0 2.4 1.0 0.0
12
2.25 3.25 3.75 0.0 0.0 10.0 1.875 4.0 3.333 2.0 2.0 3.4"""
        self.assertEqual(str(self.markovwriter.__str__()), str(self.expected_markov_file))
class TestBIFWriter(unittest.TestCase):

    def setUp(self):
        edges = [['family-out', 'dog-out'],
                 ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'],
                 ['dog-out', 'hear-bark']]

        cpds = {'bowel-problem': np.array([[0.01],
                                           [0.99]]),
                'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                     [0.9, 0.1, 0.3, 0.7]]),
                'family-out': np.array([[0.15],
                                        [0.85]]),
                'hear-bark': np.array([[0.7, 0.3],
                                       [0.01, 0.99]]),
                'light-on': np.array([[0.6, 0.4],
                                      [0.05, 0.95]])}

        states = {'bowel-problem': ['true', 'false'],
                  'dog-out': ['true', 'false'],
                  'family-out': ['true', 'false'],
                  'hear-bark': ['true', 'false'],
                  'light-on': ['true', 'false']}

        parents = {'bowel-problem': [],
                   'dog-out': ['family-out', 'bowel-problem'],
                   'family-out': [],
                   'hear-bark': ['dog-out'],
                   'light-on': ['family-out']}

        properties = {'bowel-problem': ['position = (335, 99)'],
                      'dog-out': ['position = (300, 195)'],
                      'family-out': ['position = (257, 99)'],
                      'hear-bark': ['position = (296, 268)'],
                      'light-on': ['position = (218, 195)']}

        self.model = BayesianModel(edges)

        tabular_cpds = []
        for var in sorted(cpds.keys()):
            values = cpds[var]
            cpd = TabularCPD(var, len(states[var]), values,
                             evidence=parents[var],
                             evidence_card=[len(states[evidence_var])
                                            for evidence_var in parents[var]])
            tabular_cpds.append(cpd)
        self.model.add_cpds(*tabular_cpds)

        for node, properties in properties.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                self.model.node[node][prop_name] = prop_value

        self.writer = BIFWriter(model=self.model)

    def test_str(self):
        self.expected_string = """network unknown {
}
variable bowel-problem {
    type discrete [ 2 ] { bowel-problem_0, bowel-problem_1 };
    property position = (335, 99) ;
}
variable dog-out {
    type discrete [ 2 ] { dog-out_0, dog-out_1 };
    property position = (300, 195) ;
}
variable family-out {
    type discrete [ 2 ] { family-out_0, family-out_1 };
    property position = (257, 99) ;
}
variable hear-bark {
    type discrete [ 2 ] { hear-bark_0, hear-bark_1 };
    property position = (296, 268) ;
}
variable light-on {
    type discrete [ 2 ] { light-on_0, light-on_1 };
    property position = (218, 195) ;
}
probability ( bowel-problem ) {
    table 0.01, 0.99 ;
}
probability ( dog-out | bowel-problem, family-out ) {
    table 0.99, 0.01, 0.97, 0.03, 0.9, 0.1, 0.3, 0.7 ;
}
probability ( family-out ) {
    table 0.15, 0.85 ;
}
probability ( hear-bark | dog-out ) {
    table 0.7, 0.3, 0.01, 0.99 ;
}
probability ( light-on | family-out ) {
    table 0.6, 0.4, 0.05, 0.95 ;
}
"""
        self.maxDiff = None
        self.assertEqual(self.writer.__str__(), self.expected_string)
Example #46
0
class TestBayesianModelSampling(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
        self.sampling_inference = BayesianModelSampling(self.bayesian_model)
        self.markov_model = MarkovModel()

    def test_init(self):
        with self.assertRaises(TypeError):
            BayesianModelSampling(self.markov_model)

    def test_forward_sample(self):
        sample = self.sampling_inference.forward_sample(25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({State('A', 0), State('A', 1)}))
        self.assertTrue(set(sample.J).issubset({State('J', 0), State('J', 1)}))
        self.assertTrue(set(sample.R).issubset({State('R', 0), State('R', 1)}))
        self.assertTrue(set(sample.Q).issubset({State('Q', 0), State('Q', 1)}))
        self.assertTrue(set(sample.G).issubset({State('G', 0), State('G', 1)}))
        self.assertTrue(set(sample.L).issubset({State('L', 0), State('L', 1)}))

    def test_rejection_sample_basic(self):
        sample = self.sampling_inference.rejection_sample([State('A', 1), State('J', 1), State('R', 1)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({State('A', 1)}))
        self.assertTrue(set(sample.J).issubset({State('J', 1)}))
        self.assertTrue(set(sample.R).issubset({State('R', 1)}))
        self.assertTrue(set(sample.Q).issubset({State('Q', 0), State('Q', 1)}))
        self.assertTrue(set(sample.G).issubset({State('G', 0), State('G', 1)}))
        self.assertTrue(set(sample.L).issubset({State('L', 0), State('L', 1)}))

    def test_likelihood_weighted_sample(self):
        sample = self.sampling_inference.likelihood_weighted_sample([State('A', 0), State('J', 1), State('R', 0)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 7)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertIn('_weight', sample.columns)
        self.assertTrue(set(sample.A).issubset({State('A', 0), State('A', 1)}))
        self.assertTrue(set(sample.J).issubset({State('J', 0), State('J', 1)}))
        self.assertTrue(set(sample.R).issubset({State('R', 0), State('R', 1)}))
        self.assertTrue(set(sample.Q).issubset({State('Q', 0), State('Q', 1)}))
        self.assertTrue(set(sample.G).issubset({State('G', 0), State('G', 1)}))
        self.assertTrue(set(sample.L).issubset({State('L', 0), State('L', 1)}))

    def tearDown(self):
        del self.sampling_inference
        del self.bayesian_model
        del self.markov_model
Example #47
0
class BaseModel(object):
    """
	Un objeto de este tipo contiene al modelo gráfico probabilista, incluye su grafo
	y sus parámetros (CPD) además de un objeto para hacer inferencia.

	Args:
		config_file_path (str) : la ruta al json con la información de
		DAG y sus tablas de probabilidad condicional. 
		
		data (dict) : si no se cuenta con un archivo de configuración
		se puede utilizar un diccionario con los elementos para inicializar
		el objeto.

	to-do : por ahora sólo funciona con valores binarias. 
	"""
    def __init__(self, config_file_path=None, data=None):
        self.config_file_path = config_file_path
        self.digraph = None
        self.pgmodel = None
        self.infer_system = None
        self.ebunch = None
        self.nodes = None
        self.variables_dict = dict()
        if config_file_path:
            with open(config_file_path) as json_file:
                data = json.load(json_file)
        if data.get('digraph'):
            self.ebunch = data['digraph']
            self.pgmodel = BayesianModel(self.ebunch)
            self.nodes = data.get('nodes', [])
            if self.nodes:
                self.pgmodel.add_nodes_from(self.nodes)
            self.init_graph(ebunch=self.ebunch, nodes=self.nodes)
        if data.get('cpdtables'):
            self.init_model(self.ebunch, data['cpdtables'])
            for table in self.pgmodel.get_cpds():
                logging.info(table)
        self.target = data['target']
        self.nature_variables = data['nature_variables']
        self.intervention_variables = data['interventions']

    def init_graph(self, ebunch, nodes=[], plot=True, graph_id='figures/dag'):
        """
		Creo el DAG con DiGraph de la biblioteca networkx usando
		una lista de aristas.

		Args:
			ebunch (list) : una lista de que contiene a las aristas del grafo.
			plot (boolean) : una bandera para saber si guardo una imagen del grafo
			usando matplotlib.
			graph_id (str): el nombre para identificar el grafo. 
		"""
        self.digraph = nx.DiGraph(ebunch)
        for node in nodes:
            self.digraph.add_node(node)
        if plot: self.save_digraph_as_img(graph_id)

    def reset(self, pgmodel, ebunch, nodes=[]):
        """
		Método para cambiar el modelo y el grafo. Además,
		se actualiza el sistema de inferencia de acuerdo con el nuevo
		modelo. Este método se utiliza para hacer un modelo dinámico 
		donde lo único que se mantienen son las variables.
		"""
        self.init_graph(ebunch, nodes=nodes, plot=False)
        for variable in pgmodel.nodes():
            self.variables_dict[variable] = [0, 1]
        self.ebunch = ebunch
        self.nodes = nodes
        self.pgmodel = pgmodel
        self.update_infer_system()

    def show_graph(self):
        """
		Usa matplolib para mostrar el grafo causal del modelo.
		"""
        pos = nx.circular_layout(self.digraph)
        nx.draw(self.digraph, with_labels=True, pos=pos)
        plt.show()
        plt.clf()

    def init_model(self, ebunch, cpdtables, plot=False, pgm_id='pgm'):
        """
		Creo el PGM usando PGMPY. Por ahora es un modelo Bayesiano. Recibe 
		la listas de aristas y las tablas CPD.

		Args:
			ebunch (list) : una lista de que contiene a las aristas del grafo.
			cpdtables (list) : un arreglo de diccionarios donde cada diccionario 
			contiene la información necesaria para crear una tabla de probabilidad.
			plot (boolean) : una bandera para saber si guardo una imagen del grafo
			usando matplotlib.
			graph_id (str): el nombre para identificar el grafo. 
		"""
        for cpdtable in cpdtables:
            self.variables_dict[cpdtable['variable']] = [\
             _ for _ in range(cpdtable['variable_card'])]
            table = TabularCPD(variable=cpdtable['variable'],\
               variable_card=cpdtable['variable_card'],\
               values=cpdtable['values'],\
               evidence_card=cpdtable.get('evidence_card'),\
               evidence=cpdtable.get('evidence'))
            if cpdtable.get('evidence'):
                table.reorder_parents(sorted(cpdtable.get('evidence')))
            self.pgmodel.add_cpds(table)
        if not self.pgmodel.check_model():
            raise ValueError("Error with CPDTs")
        self.update_infer_system()
        if plot: self.save_pgm_as_img(pgm_id)

    def update_infer_system(self):
        """
		Actualiza el sistema de inferencia para que sea compatible con
		el pgm. Usa VariableElimination.
		"""
        self.infer_system = VariableElimination(self.pgmodel)

    def get_variable_values(self, variable):
        """
		Obtiene una lista de los valores que puede
		tomar una variable.
		"""
        return self.variables_dict.get(variable)

    def get_target_variable(self):
        """
		Regresa una lista con las variables objetivo.
		"""
        return self.target

    def get_intervention_variables(self):
        """
		Regresa una lista con las variables intervenibles.
		"""
        return self.intervention_variables

    def get_nature_variables(self):
        """
		Regresa una lista con las variables que la naturaleza mueve.
		"""
        return self.nature_variables

    def get_ebunch(self):
        """
		Regresa lista de aristas del modelo.
		"""
        return self.ebunch

    def get_nodes(self):
        """
		Regresa lista de nodos aislados del modelo.
		"""
        return self.nodes

    def get_nature_var_prob(self, nature_variable):
        """
		Regresa una lista con las probabilidades de los valores
		de una variable de la naturaleza dada como argumento.

		Args:
			nature_variable (str) : nombre de la variable.
		"""
        if nature_variable in self.nature_variables:
            return np.squeeze(
                self.pgmodel.get_cpds(nature_variable).get_values())

    def conditional_probability(self, variable, evidence):
        """
		Calcula la probabilidad de todos los valores de una variable
		dada la evidencia usando el método de eliminación de 
		variable.
		"""
        return self.infer_system.query([variable], \
         evidence=evidence, show_progress=False)

    def make_inference(self, variable, evidence):
        """
		Ejecuta el motor de inferencia para obtener el valor de una variable
		dada la evidencia en un diccionario.

		Args:
			variable (str) : nombre de la variable a inferir.
			evidence (dict) : un diccionario con la evidencia de otras variables de la forma {variable :  value}.
		"""
        return self.infer_system.map_query([variable],\
                                           evidence=evidence, show_progress=False)[variable]

    def save_digraph_as_img(self, filename):
        """
		Método auxiliar para guardar el DAG de networkx como imagen.
		"""
        pos = nx.circular_layout(self.digraph)
        nx.draw(self.digraph, with_labels=True, pos=pos)
        plt.savefig(filename)
        plt.show()
        plt.clf()

    def save_pgm_as_img(self, filename):
        """
		Método auxiliar para guardar el DAG del pgmpy como imagen.
		"""
        nx.draw(self.digraph, with_labels=True)
        plt.show()
        plt.savefig(filename)
        plt.clf()

    def get_graph_toposort(self):
        """
		Método que regresa una lista con las variables en orden topológico
		del DAG.
		"""
        return list(nx.topological_sort(self.digraph))

    def get_nodes_and_predecessors(self):
        """
		Regresa un arreglo de duplas nodo, predecesores ordenados.
		"""
        return { node : sorted(self.digraph.predecessors(node)) \
         for node in self.digraph.nodes
        }

    def get_number_of_values(self, variable):
        """
		to-do : un método para que me regrese cuantos valores posibles tiene
		una variable y tal vez hasta los valores correspondientes
		"""
        return len(self.variables_dict.get(variable, []))

    def get_joint_prob_observation(self, observation):
        """
		Obtiene la probabilidad de una observación.
		"""
        prob = self.infer_system.query(variables=list(observation.keys()),
                                       joint=True,
                                       show_progress=False)
        variables = prob.variables
        values = prob.values
        for i in range(len(variables)):
            value = observation[variables[i]]
            values = values[value]
        return values
Example #48
0
def generateWysiwygData(samplesize=4000, filename="data/wysiwygdata4.csv"):
    ''' We define a bayesian model based on the WYSIWYG model from the thesis.
		There are 6 C variables and 6 X variables. For both C and X the first four are discrete variables,
		the other two continous. The variable C1 is causally influencing Y to assure a certain level of 
		group unfairness in the data.'''

    wysiwygmodel = BayesianModel([('A', 'C1'), ('A', 'C2'), ('A', 'C3'),
                                  ('A', 'C4'), ('C1', 'Y'), ('Y', 'C2'),
                                  ('Y', 'C3'), ('Y', 'C4'), ('A', 'X1'),
                                  ('A', 'X2'), ('A', 'X3'), ('A', 'X4'),
                                  ('Y', 'X1'), ('Y', 'X2'), ('Y', 'X3'),
                                  ('Y', 'X4')])

    cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.5], [0.5]])

    cpd_y = TabularCPD(variable='Y',
                       variable_card=2,
                       values=[[0.65], [0.4], [0.35], [0.6]],
                       evidence=['C1'],
                       evidence_card=[2])

    cpd_c1 = TabularCPD(variable='C1',
                        variable_card=2,
                        values=[[0.85, 0.2], [0.15, 0.8]],
                        evidence=['A'],
                        evidence_card=[2])

    cpd_c2 = TabularCPD(variable='C2',
                        variable_card=4,
                        values=[[0.23, 0.27, 0.25, 0.20],
                                [0.35, 0.23, 0.24, 0.15],
                                [0.22, 0.27, 0.25, 0.25],
                                [0.20, 0.23, 0.26, 0.40]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_c3 = TabularCPD(variable='C3',
                        variable_card=2,
                        values=[[0.52, 0.49, 0.5, 0.45],
                                [0.48, 0.51, 0.5, 0.55]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_c4 = TabularCPD(variable='C4',
                        variable_card=4,
                        values=[[0.22, 0.25, 0.25, 0.37],
                                [0.23, 0.25, 0.26, 0.21],
                                [0.23, 0.25, 0.25, 0.22],
                                [0.32, 0.25, 0.24, 0.20]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_x1 = TabularCPD(variable='X1',
                        variable_card=2,
                        values=[[0.57, 0.48, 0.52, 0.38],
                                [0.43, 0.52, 0.48, 0.62]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_x2 = TabularCPD(variable='X2',
                        variable_card=4,
                        values=[[0.24, 0.28, 0.26, 0.19],
                                [0.38, 0.22, 0.24, 0.15],
                                [0.20, 0.28, 0.26, 0.23],
                                [0.18, 0.22, 0.24, 0.43]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_x3 = TabularCPD(variable='X3',
                        variable_card=2,
                        values=[[0.54, 0.48, 0.52, 0.4],
                                [0.46, 0.52, 0.48, 0.6]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_x4 = TabularCPD(variable='X4',
                        variable_card=4,
                        values=[[0.20, 0.25, 0.24, 0.40],
                                [0.21, 0.25, 0.28, 0.21],
                                [0.21, 0.25, 0.24, 0.21],
                                [0.38, 0.25, 0.24, 0.18]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    wysiwygmodel.add_cpds(cpd_a, cpd_c1, cpd_c2, cpd_c3, cpd_c4, cpd_x1,
                          cpd_x2, cpd_x3, cpd_x4, cpd_y)
    datasamples = BayesianModelSampling(wysiwygmodel)
    discframe = datasamples.forward_sample(samplesize)
    AY = discframe[["A", "Y"]]

    C5 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="C5",
                          meana0=1,
                          meana1=1.2,
                          covy0=[1],
                          covy1=[0.9])
    C6 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="C6",
                          meana0=2,
                          meana1=1.8,
                          covy0=[1],
                          covy1=[0.95])

    X5 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="X5",
                          meana0=1.1,
                          meana1=1.4,
                          covy0=[1.1],
                          covy1=[0.95])
    X6 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="X6",
                          meana0=1.9,
                          meana1=1.5,
                          covy0=[1],
                          covy1=[1.1])

    discframe = pd.concat([discframe, C5, C6, X5, X6], axis=1)
    discframe.to_csv(path_or_buf=filename)
Example #49
0
def generateWysiwygFIData(samplesize=4000, filename="data/preFIData.csv"):
    ''' The bayesian network that was used in the FI experiment.
	The edges between X and Y are flipped from the previous models,
	so X causally influences Y. The D variables are added to more closely approximate 
	the experiments from the 'Fair Inference on Outcomes' paper.  '''
    wysiwygmodel = BayesianModel([('A', 'C1'), ('A', 'C2'), ('A', 'C3'),
                                  ('A', 'C4'), ('Y', 'C2'), ('Y', 'C3'),
                                  ('Y', 'C4'), ('A', 'X1'), ('A', 'X2'),
                                  ('A', 'X3'), ('A', 'X4'), ('X1', 'Y'),
                                  ('X2', 'Y'), ('X3', 'Y'), ('X4', 'Y'),
                                  ('D1', 'X1'), ('D1', 'X2'), ('D2', 'X3'),
                                  ('D3', 'X4')])

    cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.5], [0.5]])

    cpd_d1 = TabularCPD(variable='D1',
                        variable_card=2,
                        values=[[0.45], [0.55]])

    cpd_d2 = TabularCPD(variable='D2',
                        variable_card=4,
                        values=[[0.22], [0.24], [0.28], [0.26]])
    cpd_d3 = TabularCPD(variable='D3',
                        variable_card=2,
                        values=[[0.54], [0.46]])

    ydists = computeYDist()

    cpd_y = TabularCPD(variable='Y',
                       variable_card=2,
                       values=[ydists[0], ydists[1]],
                       evidence=['X1', 'X3', 'X2', 'X4'],
                       evidence_card=[2, 2, 4, 4])

    cpd_c1 = TabularCPD(variable='C1',
                        variable_card=2,
                        values=[[0.85, 0.2], [0.15, 0.8]],
                        evidence=['A'],
                        evidence_card=[2])

    cpd_c2 = TabularCPD(variable='C2',
                        variable_card=4,
                        values=[[0.23, 0.27, 0.25, 0.20],
                                [0.35, 0.23, 0.24, 0.15],
                                [0.22, 0.27, 0.25, 0.25],
                                [0.20, 0.23, 0.26, 0.40]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_c3 = TabularCPD(variable='C3',
                        variable_card=2,
                        values=[[0.52, 0.49, 0.5, 0.45],
                                [0.48, 0.51, 0.5, 0.55]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_c4 = TabularCPD(variable='C4',
                        variable_card=4,
                        values=[[0.22, 0.25, 0.25, 0.37],
                                [0.23, 0.25, 0.26, 0.21],
                                [0.23, 0.25, 0.25, 0.22],
                                [0.32, 0.25, 0.24, 0.20]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_x1 = TabularCPD(
        variable='X1',
        variable_card=2,
        values=[
            [0.38, 0.40, 0.60, 0.62],  #GOOD
            [0.62, 0.60, 0.40, 0.38]
        ],
        evidence=['A', 'D1'],
        evidence_card=[2, 2])

    cpd_x2 = TabularCPD(
        variable='X2',
        variable_card=4,
        values=[
            [0.30, 0.28, 0.15, 0.14],
            [0.24, 0.26, 0.30, 0.32],  #GOOD 2
            [0.16, 0.18, 0.38, 0.40],  #GOOD 1
            [0.30, 0.28, 0.17, 0.14]
        ],
        evidence=['A', 'D1'],
        evidence_card=[2, 2])

    cpd_x3 = TabularCPD(
        variable='X3',
        variable_card=2,
        values=[[0.64, 0.62, 0.62, 0.63, 0.38, 0.35, 0.35, 0.37],
                [0.36, 0.38, 0.38, 0.37, 0.62, 0.65, 0.65, 0.63]],  #GOOD
        evidence=['A', 'D2'],
        evidence_card=[2, 4])

    cpd_x4 = TabularCPD(
        variable='X4',
        variable_card=4,
        values=[
            [0.25, 0.27, 0.07, 0.09],
            [0.36, 0.34, 0.64, 0.62],  #GOOD1
            [0.25, 0.27, 0.07, 0.09],
            [0.14, 0.12, 0.22, 0.20]
        ],  #GOOD2
        evidence=['A', 'D3'],
        evidence_card=[2, 2])

    wysiwygmodel.add_cpds(cpd_a, cpd_c1, cpd_c2, cpd_c3, cpd_c4, cpd_x1,
                          cpd_x2, cpd_x3, cpd_x4, cpd_y, cpd_d1, cpd_d2,
                          cpd_d3)
    datasamples = BayesianModelSampling(wysiwygmodel)
    discframe = datasamples.forward_sample(samplesize)
    AY = discframe[["A", "Y"]]

    C5 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="C5",
                          meana0=1,
                          meana1=1.2,
                          covy0=[1],
                          covy1=[0.9])
    C6 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="C6",
                          meana0=2,
                          meana1=1.8,
                          covy0=[1],
                          covy1=[0.95])

    X5 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="X5",
                          meana0=1.1,
                          meana1=1.4,
                          covy0=[1.1],
                          covy1=[0.95])
    X6 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="X6",
                          meana0=1.9,
                          meana1=1.5,
                          covy0=[1],
                          covy1=[1.1])

    discframe = pd.concat([discframe, C5, C6, X5, X6], axis=1)
    ndf = discframe.reindex(axis=1,
                            labels=[
                                'A', 'Y', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6',
                                'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'D1', 'D2',
                                'D3'
                            ])
    ndf.to_csv(path_or_buf=filename)
Example #50
0
class TestBayesianModelCPD(unittest.TestCase):

    def setUp(self):
        self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'),
                                ('i', 's')])

    def test_active_trail_nodes(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's'])

    def test_active_trail_nodes_args(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's'])
        self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's'])

    def test_is_active_trail_triplets(self):
        self.assertTrue(self.G.is_active_trail('d', 'l'))
        self.assertTrue(self.G.is_active_trail('g', 's'))
        self.assertFalse(self.G.is_active_trail('d', 'i'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='g'))
        self.assertFalse(self.G.is_active_trail('d', 'l', observed='g'))
        self.assertFalse(self.G.is_active_trail('i', 'l', observed='g'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='l'))
        self.assertFalse(self.G.is_active_trail('g', 's', observed='i'))

    def test_is_active_trail(self):
        self.assertFalse(self.G.is_active_trail('d', 's'))
        self.assertTrue(self.G.is_active_trail('s', 'l'))
        self.assertTrue(self.G.is_active_trail('d', 's', observed='g'))
        self.assertFalse(self.G.is_active_trail('s', 'l', observed='g'))

    def test_is_active_trail_args(self):
        self.assertFalse(self.G.is_active_trail('s', 'l', 'i'))
        self.assertFalse(self.G.is_active_trail('s', 'l', 'g'))
        self.assertTrue(self.G.is_active_trail('d', 's', 'l'))
        self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l']))

    def test_get_cpds(self):
        cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, values=np.random.rand(2, 4),
                           evidence=['d', 'i'], evidence_card=[2, 2])
        cpd_l = TabularCPD('l', 2, values=np.random.rand(2, 2),
                           evidence=['g'], evidence_card=[2])
        cpd_s = TabularCPD('s', 2, values=np.random.rand(2, 2),
                           evidence=['i'], evidence_card=[2])
        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

        self.assertEqual(self.G.get_cpds('d').variable, 'd')

    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, values=np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, values=np.random.rand(2, 2),
                            evidence=['A'], evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')
        self.assertRaises(ValueError, self.model.get_cpds, 'B')

        self.model.add_node('B')
        self.assertRaises(ValueError, self.model.get_cpds, 'B')

    def test_add_single_cpd(self):
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], [2])
        self.G.add_cpds(cpd_s)
        self.assertListEqual(self.G.get_cpds(), [cpd_s])

    def test_add_multiple_cpds(self):
        cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, values=np.random.rand(2, 4),
                           evidence=['d', 'i'], evidence_card=[2, 2])
        cpd_l = TabularCPD('l', 2, values=np.random.rand(2, 2),
                           evidence=['g'], evidence_card=[2])
        cpd_s = TabularCPD('s', 2, values=np.random.rand(2, 2),
                           evidence=['i'], evidence_card=[2])

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)

    def test_check_model(self):
        cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6],
                                                    [0.8, 0.7, 0.6, 0.4]]),
                           evidence=['d', 'i'], evidence_card=[2, 2])

        cpd_s = TabularCPD('s', 2, values=np.array([[0.2, 0.3],
                                                    [0.8, 0.7]]),
                           evidence=['i'], evidence_card=[2])

        cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3],
                                                    [0.8, 0.7]]),
                           evidence=['g'], evidence_card=[2])

        self.G.add_cpds(cpd_g, cpd_s, cpd_l)
        self.assertRaises(ValueError, self.G.check_model)

        cpd_d = TabularCPD('d', 2, values=[[0.8, 0.2]])
        cpd_i = TabularCPD('i', 2, values=[[0.7, 0.3]])
        self.G.add_cpds(cpd_d, cpd_i)

        self.assertTrue(self.G.check_model())

    def test_check_model1(self):
        cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3],
                                                    [0.8, 0.7]]),
                           evidence=['i'], evidence_card=[2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6],
                                                    [0.8, 0.7, 0.6, 0.4]]),
                           evidence=['d', 's'], evidence_card=[2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3],
                                                    [0.8, 0.7]]),
                           evidence=['l'], evidence_card=[2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3],
                                                    [0.8, 0.7]]),
                           evidence=['d'], evidence_card=[2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

        cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3, 0.4, 0.6],
                                                    [0.8, 0.7, 0.6, 0.4]]),
                           evidence=['d', 'i'], evidence_card=[2, 2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

        cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6],
                                                    [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4]]),
                           evidence=['g', 'd', 'i'], evidence_card=[2, 2, 2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

    def test_check_model2(self):
        cpd_s = TabularCPD('s', 2, values=np.array([[0.5, 0.3],
                                                    [0.8, 0.7]]),
                           evidence=['i'], evidence_card=[2])
        self.G.add_cpds(cpd_s)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_s)

        cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6],
                                                    [0.3, 0.7, 0.6, 0.4]]),
                           evidence=['d', 'i'], evidence_card=[2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3],
                                                    [0.1, 0.7]]),
                           evidence=['g'], evidence_card=[2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

    def tearDown(self):
        del self.G
Example #51
0
def network_construction():
    """
    Construction of the Bayesian_network
    
    input\output:none
    if the sencente "check the Bayesian network again" happens,please check the model parameters again!
    
    Symbol description:
        mode:   CS: 压气机喘振;CF: 压气机结垢;CC:压气机磨损腐蚀;CI:压气机进气口结冰;TF:透平结垢;TC:透平磨损腐蚀;
                 TD:透平叶片机械损伤;BF:燃烧室故障;BP:燃烧脉动;HW:轮间温度高;HB:叶片通道温差大
        symptom:v:轰鸣声音;s:转速波动;f1:压气机压力波动;dp:压气机入口压差;r:压比;m1:压气机入口流量;m2:透平出口流量
                t2:压气机出口温度;p2:压气机出口压力;t4:透平排烟温度;f2 燃烧室压力波动;d:排烟分散度
                ce:压气机效率;te:透平效率;fi:空气过滤器滤芯结冰;htw:运行中某一点轮间温度高于限定值;
                htb:叶片通道(BPT)温度与平均值温度偏差大于报警限值
    """

    fault_model = BayesianModel([('CS', 'v'), ('CS', 's'), ('CS', 'f1'),
                                 ('CF', 'dp'), ('CF', 'm1'), ('CF', 'r'),
                                 ('CF', 'ce'), ('CC', 'm1'), ('CC', 'ce'),
                                 ('ce', 'p2'), ('ce', 't2'), ('TF', 'te'),
                                 ('TF', 'm2'), ('TC', 'te'), ('TC', 'm2'),
                                 ('te', 'p2'), ('te', 't4'), ('BF', 'd'),
                                 ('BP', 'f2'), ('BP', 'd'), ('CI', 'fi'),
                                 ('TD', 'te'), ('HB', 'htb'), ('HW', 'htw')])
    # defining the parameters(conditional probability).
    # 故障模式的先验概率 (共11个故障模式)
    cs_cpd = TabularCPD(variable='CS', variable_card=2,
                        values=[[0.05, 0.95]])  #压气机喘振
    cf_cpd = TabularCPD(variable='CF', variable_card=2,
                        values=[[0.2, 0.8]])  #压气机叶片结垢
    cc_cpd = TabularCPD(variable='CC', variable_card=2,
                        values=[[0.1, 0.9]])  #压气机叶片磨损腐蚀
    ci_cpd = TabularCPD(variable='CI', variable_card=2,
                        values=[[0.03, 0.97]])  #压气机进气口结冰
    tf_cpd = TabularCPD(variable='TF', variable_card=2,
                        values=[[0.1, 0.9]])  #透平叶片结垢
    tc_cpd = TabularCPD(variable='TC', variable_card=2,
                        values=[[0.1, 0.9]])  #透平叶片磨损腐蚀
    td_cpd = TabularCPD(variable='TD', variable_card=2,
                        values=[[0.05, 0.95]])  #透平叶片机械损伤
    bf_cpd = TabularCPD(variable='BF', variable_card=2, values=[[0.1,
                                                                 0.9]])  #燃烧室故障
    bp_cpd = TabularCPD(variable='BP', variable_card=2, values=[[0.1,
                                                                 0.9]])  #燃烧脉动
    hw_cpd = TabularCPD(variable='HW', variable_card=2, values=[[0.1,
                                                                 0.9]])  #轮间温度高
    hb_cpd = TabularCPD(variable='HB', variable_card=2,
                        values=[[0.1, 0.9]])  #叶片通道温差大
    # 故障征兆的条件概率。(共14个征兆,其中2个是能效指标)以Noise-or为原则进行赋值。
    #父节点认为相互独立;泄露概率0.01;0.9 强烈关联;0.8代表有关联;0.7代表可能关联 0.6代表不确定关联是否成立
    ce_cpd = TabularCPD(variable='ce',
                        variable_card=2,
                        evidence=['CF', 'CC'],
                        evidence_card=[2, 2],
                        values=[[0.99, 0.1, 0.1, 0.0099],
                                [0.01, 0.9, 0.9, 0.9901]])  #能效异常模式
    te_cpd = TabularCPD(
        variable='te',
        variable_card=2,
        evidence=['TF', 'TC', 'TD'],
        evidence_card=[2, 2, 2],
        values=[[0.99, 0.1, 0.1, 0.0099, 0.1, 0.0099, 0.0099, 0.00099],
                [0.01, 0.9, 0.9, 0.9901, 0.9, 0.9901, 0.9901,
                 0.99901]])  #能效异常模式
    v_cpd = TabularCPD(variable='v',
                       variable_card=2,
                       evidence=['CS'],
                       evidence_card=[2],
                       values=[[0.99, 0.1], [0.01, 0.9]])
    s_cpd = TabularCPD(variable='s',
                       variable_card=2,
                       evidence=['CS'],
                       evidence_card=[2],
                       values=[[0.99, 0.1], [0.01, 0.9]])
    f1_cpd = TabularCPD(variable='f1',
                        variable_card=2,
                        evidence=['CS'],
                        evidence_card=[2],
                        values=[[0.99, 0.1], [0.01, 0.9]])
    dp_cpd = TabularCPD(variable='dp',
                        variable_card=2,
                        evidence=['CF'],
                        evidence_card=[2],
                        values=[[0.99, 0.1], [0.01, 0.9]])
    m1_cpd = TabularCPD(variable='m1',
                        variable_card=2,
                        evidence=['CF', 'CC'],
                        evidence_card=[2, 2],
                        values=[[0.99, 0.1, 0.2, 0.0198],
                                [0.01, 0.9, 0.8, 0.9802]])
    r_cpd = TabularCPD(variable='r',
                       variable_card=2,
                       evidence=['CF'],
                       evidence_card=[2],
                       values=[[0.99, 0.1], [0.01, 0.9]])
    t2_cpd = TabularCPD(variable='t2',
                        variable_card=2,
                        evidence=['ce'],
                        evidence_card=[2],
                        values=[[0.99, 0.1], [0.01, 0.9]])
    p2_cpd = TabularCPD(variable='p2',
                        variable_card=2,
                        evidence=['ce', 'te'],
                        evidence_card=[2, 2],
                        values=[[0.99, 0.3, 0.2, 0.0594],
                                [0.01, 0.7, 0.8, 0.9406]])
    t4_cpd = TabularCPD(variable='t4',
                        variable_card=2,
                        evidence=['te'],
                        evidence_card=[2],
                        values=[[0.99, 0.1], [0.01, 0.9]])
    m2_cpd = TabularCPD(variable='m2',
                        variable_card=2,
                        evidence=['TF', 'TC'],
                        evidence_card=[2, 2],
                        values=[[0.99, 0.2, 0.1, 0.0198],
                                [0.01, 0.8, 0.9, 0.9802]])
    f2_cpd = TabularCPD(variable='f2',
                        variable_card=2,
                        evidence=['BP'],
                        evidence_card=[2],
                        values=[[0.99, 0.1], [0.01, 0.9]])
    d_cpd = TabularCPD(variable='d',
                       variable_card=2,
                       evidence=['BF', 'BP'],
                       evidence_card=[2, 2],
                       values=[[0.99, 0.2, 0.1, 0.0198],
                               [0.01, 0.8, 0.9, 0.9802]])
    fi_cpd = TabularCPD(variable='fi',
                        variable_card=2,
                        evidence=['CI'],
                        evidence_card=[2],
                        values=[[0.99, 0.1], [0.01, 0.9]])
    htb_cpd = TabularCPD(variable='htb',
                         variable_card=2,
                         evidence=['HB'],
                         evidence_card=[2],
                         values=[[0.99, 0.1], [0.01, 0.9]])
    htw_cpd = TabularCPD(variable='htw',
                         variable_card=2,
                         evidence=['HW'],
                         evidence_card=[2],
                         values=[[0.99, 0.1], [0.01, 0.9]])
    #Associating the parameters with the model structure.
    fault_model.add_cpds(cs_cpd, cf_cpd, cc_cpd, ci_cpd, tf_cpd, tc_cpd,
                         td_cpd, bf_cpd, bp_cpd, hb_cpd, hw_cpd, ce_cpd,
                         te_cpd, v_cpd, s_cpd, f1_cpd, dp_cpd, m1_cpd, r_cpd,
                         t2_cpd, p2_cpd, t4_cpd, m2_cpd, f2_cpd, d_cpd, fi_cpd,
                         htb_cpd, htw_cpd)
    # Checking if the cpds are valid for the model.
    try:
        fault_model.check_model()
    except ValueError:
        print('check the Bayesian network again')
    else:
        joblib.dump(fault_model, 'model/fault_model.pkl')  #模型存储
    return fault_model
Example #52
0
class TestDirectedGraphCPDOperations(unittest.TestCase):

    def setUp(self):
        self.graph = BayesianModel()

    def test_add_single_cpd(self):
        cpd = TabularCPD('grade', 2, values=np.random.rand(2, 4),
                         evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd)
        self.assertListEqual(self.graph.get_cpds(), [cpd])

    def test_add_multiple_cpds(self):
        cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4),
                          evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertListEqual(self.graph.get_cpds(), [cpd1, cpd2, cpd3])

    def test_remove_single_cpd(self):
        cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4),
                          evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds(cpd1)
        self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3])

    def test_remove_multiple_cpds(self):
        cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4),
                          evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds(cpd1, cpd3)
        self.assertListEqual(self.graph.get_cpds(), [cpd2])

    def test_remove_single_cpd_string(self):
        cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4),
                          evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds('diff')
        self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3])

    def test_remove_multiple_cpds_string(self):
        cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4),
                          evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds('diff', 'grade')
        self.assertListEqual(self.graph.get_cpds(), [cpd2])

    def test_get_cpd_for_node(self):
        cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4),
                          evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertEqual(self.graph.get_cpds('diff'), cpd1)
        self.assertEqual(self.graph.get_cpds('intel'), cpd2)
        self.assertEqual(self.graph.get_cpds('grade'), cpd3)

    def test_get_cpd_raises_error(self):
        cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4),
                          evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertRaises(ValueError, self.graph.get_cpds, 'sat')

    def tearDown(self):
        del self.graph
Example #53
0
class Inference(object):
    """
    Base class for all inference algorithms.

    Converts BayesianModel and MarkovModel to a uniform representation so that inference
    algorithms can be applied. Also it checks if all the associated CPDs / Factors are
    consistent with the model.

    Initialize inference for a model.

    Parameters
    ----------
    model: pgmpy.models.BayesianModel or pgmpy.models.MarkovModel or pgmpy.models.NoisyOrModel
        model for which to initialize the inference object.

    Examples
    --------
    >>> from pgmpy.inference import Inference
    >>> from pgmpy.models import BayesianModel
    >>> from pgmpy.factors import TabularCPD
    >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
    >>> diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
    >>> intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
    >>> grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
    ...                                     [0.1, 0.1, 0.1, 0.1],
    ...                                     [0.8, 0.8, 0.8, 0.8]],
    ...                        evidence=['diff', 'intel'], evidence_card=[2, 2])
    >>> student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
    >>> model = Inference(student)

    >>> from pgmpy.models import MarkovModel
    >>> from pgmpy.factors import Factor
    >>> import numpy as np
    >>> student = MarkovModel([('Alice', 'Bob'), ('Bob', 'Charles'),
    ...                        ('Charles', 'Debbie'), ('Debbie', 'Alice')])
    >>> factor_a_b = Factor(['Alice', 'Bob'], cardinality=[2, 2], value=np.random.rand(4))
    >>> factor_b_c = Factor(['Bob', 'Charles'], cardinality=[2, 2], value=np.random.rand(4))
    >>> factor_c_d = Factor(['Charles', 'Debbie'], cardinality=[2, 2], value=np.random.rand(4))
    >>> factor_d_a = Factor(['Debbie', 'Alice'], cardinality=[2, 2], value=np.random.rand(4))
    >>> student.add_factors(factor_a_b, factor_b_c, factor_c_d, factor_d_a)
    >>> model = Inference(student)
    """
    @StateNameInit()
    def __init__(self, model):
        self.model = model
        model.check_model()

        if isinstance(model, JunctionTree):
            self.variables = set(chain(*model.nodes()))
        else:
            self.variables = model.nodes()

        self.cardinality = {}
        self.factors = defaultdict(list)

        if isinstance(model, BayesianModel):
            for node in model.nodes():
                cpd = model.get_cpds(node)
                cpd_as_factor = cpd.to_factor()
                self.cardinality[node] = cpd.variable_card

                for var in cpd.variables:
                    self.factors[var].append(cpd_as_factor)

        elif isinstance(model, (MarkovModel, FactorGraph, JunctionTree)):
            self.cardinality = model.get_cardinality()

            for factor in model.get_factors():
                for var in factor.variables:
                    self.factors[var].append(factor)

        elif isinstance(model, DynamicBayesianNetwork):
            self.start_bayesian_model = BayesianModel(model.get_intra_edges(0))
            self.start_bayesian_model.add_cpds(*model.get_cpds(time_slice=0))
            cpd_inter = [
                model.get_cpds(node) for node in model.get_interface_nodes(1)
            ]
            self.interface_nodes = model.get_interface_nodes(0)
            self.one_and_half_model = BayesianModel(model.get_inter_edges() +
                                                    model.get_intra_edges(1))
            self.one_and_half_model.add_cpds(*(model.get_cpds(time_slice=1) +
                                               cpd_inter))
Example #54
0
class Inference(object):
    """
    Base class for all inference algorithms.

    Converts BayesianModel and MarkovModel to a uniform representation so that inference
    algorithms can be applied. Also it checks if all the associated CPDs / Factors are
    consistent with the model.

    Initialize inference for a model.

    Parameters
    ----------
    model: pgmpy.models.BayesianModel or pgmpy.models.MarkovModel or pgmpy.models.NoisyOrModel
        model for which to initialize the inference object.

    Examples
    --------
    >>> from pgmpy.inference import Inference
    >>> from pgmpy.models import BayesianModel
    >>> from pgmpy.factors import TabularCPD
    >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
    >>> diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
    >>> intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
    >>> grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
    ...                                     [0.1, 0.1, 0.1, 0.1],
    ...                                     [0.8, 0.8, 0.8, 0.8]],
    ...                        evidence=['diff', 'intel'], evidence_card=[2, 2])
    >>> student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
    >>> model = Inference(student)

    >>> from pgmpy.models import MarkovModel
    >>> from pgmpy.factors import Factor
    >>> import numpy as np
    >>> student = MarkovModel([('Alice', 'Bob'), ('Bob', 'Charles'),
    ...                        ('Charles', 'Debbie'), ('Debbie', 'Alice')])
    >>> factor_a_b = Factor(['Alice', 'Bob'], cardinality=[2, 2], value=np.random.rand(4))
    >>> factor_b_c = Factor(['Bob', 'Charles'], cardinality=[2, 2], value=np.random.rand(4))
    >>> factor_c_d = Factor(['Charles', 'Debbie'], cardinality=[2, 2], value=np.random.rand(4))
    >>> factor_d_a = Factor(['Debbie', 'Alice'], cardinality=[2, 2], value=np.random.rand(4))
    >>> student.add_factors(factor_a_b, factor_b_c, factor_c_d, factor_d_a)
    >>> model = Inference(student)
    """

    def __init__(self, model):
        self.model = model
        model.check_model()

        if isinstance(model, JunctionTree):
            self.variables = set(chain(*model.nodes()))
        else:
            self.variables = model.nodes()

        self.cardinality = {}
        self.factors = defaultdict(list)

        if isinstance(model, BayesianModel):
            for node in model.nodes():
                cpd = model.get_cpds(node)
                cpd_as_factor = cpd.to_factor()
                self.cardinality[node] = cpd.variable_card

                for var in cpd.variables:
                    self.factors[var].append(cpd_as_factor)

        elif isinstance(model, (MarkovModel, FactorGraph, JunctionTree)):
            self.cardinality = model.get_cardinality()

            for factor in model.get_factors():
                for var in factor.variables:
                    self.factors[var].append(factor)

        elif isinstance(model, DynamicBayesianNetwork):
            self.start_bayesian_model = BayesianModel(model.get_intra_edges(0))
            self.start_bayesian_model.add_cpds(*model.get_cpds(time_slice=0))
            cpd_inter = [model.get_cpds(node) for node in model.get_interface_nodes(1)]
            self.interface_nodes = model.get_interface_nodes(0)
            self.one_and_half_model = BayesianModel(model.get_inter_edges() + model.get_intra_edges(1))
            self.one_and_half_model.add_cpds(*(model.get_cpds(time_slice=1) + cpd_inter))
Example #55
0
                   evidence_card=[2, 2])

cpd_l = TabularCPD(variable='L',
                   variable_card=2,
                   values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]],
                   evidence=['G'],
                   evidence_card=[3])

cpd_s = TabularCPD(variable='S',
                   variable_card=2,
                   values=[[0.95, 0.2], [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])

# Associating the CPDs with the network
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
# defined and sum to 1.
model.check_model()

# In[21]:

# We can now call some methods on the BayesianModel object.
model.get_cpds()

# In[22]:

print(model.get_cpds('G'))

# In[23]:
Example #56
0
                       ('traffic_jam', 'long_queues'),
                       ('traffic_jam', 'late_for_school'),
                       ('getting_up_late', 'late_for_school')])
cpd_rain = TabularCPD('rain', 2, [[0.4], [0.6]])
cpd_accident = TabularCPD('accident', 2, [[0.2], [0.8]])
cpd_traffic_jam = TabularCPD('traffic_jam', 2,
                             [[0.9, 0.6, 0.7, 0.1],
                              [0.1, 0.4, 0.3, 0.9]],
                             evidence=['rain', 'accident'],
                             evidence_card=[2, 2])
cpd_getting_up_late = TabularCPD('getting_up_late', 2,
                                 [[0.6], [0.4]])
cpd_late_for_school = TabularCPD('late_for_school', 2,
                                 [[0.9, 0.45, 0.8, 0.1],
                                  [0.1, 0.55, 0.2, 0.9]],
                                 evidence = ['getting_up_late',
                                             'traffic_jam'],
                                 evidence_card=[2, 2])
cpd_long_queues = TabularCPD('long_queues', 2,
                             [[0.9, 0.2],
                              [0.1, 0.8]],
                             evidence=['traffic_jam'],
                             evidence_card=[2])
model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam,
               cpd_getting_up_late, cpd_late_for_school,
               cpd_long_queues)
cbp_inference = CBP(model)
cbp_inference.map_query(variables=['traffic_jam', 'late_for_school'])
cbp_inference.map_query(variables=['traffic_jam'],
                        evidence={'accident': 1, 'long_queues': 0})
Example #57
0
def generateWysiwygFIDataOld(samplesize=4000, filename="data/preFIData.csv"):
    ''' old version of the bayesian model for the Fair Inference experiment.
	Here Y still influences X to make modelling Y simpler.
	This is not suitable for FI.
	This model is unused in the experiments in the final thesis.
	'''
    wysiwygmodel = BayesianModel([('A', 'C1'), ('A', 'C2'), ('A', 'C3'),
                                  ('A', 'C4'), ('C1', 'Y'), ('Y', 'C2'),
                                  ('Y', 'C3'), ('Y', 'C4'), ('A', 'X1'),
                                  ('A', 'X2'), ('A', 'X3'), ('A', 'X4'),
                                  ('Y', 'X1'), ('Y', 'X2'), ('Y', 'X3'),
                                  ('Y', 'X4'), ('D1', 'X1'), ('D1', 'X2'),
                                  ('D2', 'X3'), ('D3', 'X4')])

    cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.5], [0.5]])

    cpd_d1 = TabularCPD(variable='D1',
                        variable_card=2,
                        values=[[0.45], [0.55]])

    cpd_d2 = TabularCPD(variable='D2',
                        variable_card=4,
                        values=[[0.22], [0.24], [0.28], [0.26]])
    cpd_d3 = TabularCPD(variable='D3',
                        variable_card=2,
                        values=[[0.54], [0.46]])

    cpd_y = TabularCPD(variable='Y',
                       variable_card=2,
                       values=[[0.7], [0.3], [0.3], [0.7]],
                       evidence=['C1'],
                       evidence_card=[2])

    cpd_c1 = TabularCPD(variable='C1',
                        variable_card=2,
                        values=[[0.85, 0.2], [0.15, 0.8]],
                        evidence=['A'],
                        evidence_card=[2])

    cpd_c2 = TabularCPD(variable='C2',
                        variable_card=4,
                        values=[[0.23, 0.27, 0.25, 0.20],
                                [0.35, 0.23, 0.24, 0.15],
                                [0.22, 0.27, 0.25, 0.25],
                                [0.20, 0.23, 0.26, 0.40]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_c3 = TabularCPD(variable='C3',
                        variable_card=2,
                        values=[[0.52, 0.49, 0.5, 0.45],
                                [0.48, 0.51, 0.5, 0.55]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_c4 = TabularCPD(variable='C4',
                        variable_card=4,
                        values=[[0.22, 0.25, 0.25, 0.37],
                                [0.23, 0.25, 0.26, 0.21],
                                [0.23, 0.25, 0.25, 0.22],
                                [0.32, 0.25, 0.24, 0.20]],
                        evidence=['A', 'Y'],
                        evidence_card=[2, 2])

    cpd_x1 = TabularCPD(
        variable='X1',
        variable_card=2,
        values=[
            [0.38, 0.40, 0.42, 0.44, 0.57, 0.59, 0.60, 0.62],  #GOOD
            [0.62, 0.60, 0.58, 0.56, 0.43, 0.41, 0.40, 0.38]
        ],
        evidence=['A', 'Y', 'D1'],
        evidence_card=[2, 2, 2])

    cpd_x2 = TabularCPD(
        variable='X2',
        variable_card=4,
        values=[
            [0.30, 0.28, 0.27, 0.25, 0.17, 0.16, 0.15, 0.14],
            [0.24, 0.26, 0.26, 0.27, 0.29, 0.31, 0.30, 0.32],  #GOOD 2
            [0.16, 0.18, 0.20, 0.22, 0.35, 0.37, 0.38, 0.40],  #GOOD 1
            [0.30, 0.28, 0.27, 0.26, 0.19, 0.16, 0.17, 0.14]
        ],
        evidence=['A', 'Y', 'D1'],
        evidence_card=[2, 2, 2])

    cpd_x3 = TabularCPD(
        variable='X3',
        variable_card=2,
        values=[[
            0.64, 0.62, 0.62, 0.63, 0.60, 0.58, 0.58, 0.59, 0.40, 0.39, 0.39,
            0.38, 0.38, 0.35, 0.35, 0.37
        ],
                [
                    0.36, 0.38, 0.38, 0.37, 0.40, 0.42, 0.42, 0.41, 0.60, 0.61,
                    0.61, 0.62, 0.62, 0.65, 0.65, 0.63
                ]],  #GOOD
        evidence=['A', 'Y', 'D2'],
        evidence_card=[2, 2, 4])

    cpd_x4 = TabularCPD(
        variable='X4',
        variable_card=4,
        values=[
            [0.25, 0.27, 0.21, 0.23, 0.10, 0.12, 0.07, 0.09],
            [0.36, 0.34, 0.42, 0.40, 0.60, 0.58, 0.64, 0.62],  #GOOD1
            [0.25, 0.27, 0.21, 0.23, 0.10, 0.12, 0.07, 0.09],
            [0.14, 0.12, 0.16, 0.14, 0.20, 0.18, 0.22, 0.20]
        ],  #GOOD2
        evidence=['A', 'Y', 'D3'],
        evidence_card=[2, 2, 2])

    wysiwygmodel.add_cpds(cpd_a, cpd_c1, cpd_c2, cpd_c3, cpd_c4, cpd_x1,
                          cpd_x2, cpd_x3, cpd_x4, cpd_y, cpd_d1, cpd_d2,
                          cpd_d3)
    datasamples = BayesianModelSampling(wysiwygmodel)
    discframe = datasamples.forward_sample(samplesize)
    AY = discframe[["A", "Y"]]

    C5 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="C5",
                          meana0=1,
                          meana1=1.2,
                          covy0=[1],
                          covy1=[0.9])
    C6 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="C6",
                          meana0=2,
                          meana1=1.8,
                          covy0=[1],
                          covy1=[0.95])

    X5 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="X5",
                          meana0=1.1,
                          meana1=1.4,
                          covy0=[1.1],
                          covy1=[0.95])
    X6 = samplecontinuous(AY,
                          samplesize=samplesize,
                          contatt="X6",
                          meana0=1.9,
                          meana1=1.5,
                          covy0=[1],
                          covy1=[1.1])

    discframe = pd.concat([discframe, C5, C6, X5, X6], axis=1)
    ndf = discframe.reindex(axis=1,
                            labels=[
                                'A', 'Y', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6',
                                'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'D1', 'D2',
                                'D3'
                            ])
    ndf.to_csv(path_or_buf=filename)