Exemple #1
0
    def initialize_tabular_cpd(self, cid: BayesianModel) -> bool:
        """Initialize the probability table for the inherited TabularCPD

        Returns True if successful, False otherwise
        """
        if not self.parents_instantiated(cid):
            return False
        poss_values = self.possible_values(cid)
        if self.force_state_names:
            state_names_list = self.force_state_names
            if not set(poss_values).issubset(state_names_list):
                raise Exception(
                    "variable {} can take value outside given state_names".
                    format(self.variable))
        else:
            state_names_list = poss_values

        card = len(state_names_list)
        evidence = cid.get_parents(self.variable)
        evidence_card = [cid.get_cardinality(p) for p in evidence]
        matrix = np.array([[
            int(self.f(*i) == t)
            for i in itertools.product(*self.parent_values(cid))
        ] for t in state_names_list])
        state_names = {self.variable: state_names_list}

        super().__init__(self.variable,
                         card,
                         matrix,
                         evidence,
                         evidence_card,
                         state_names=state_names)
        return True
Exemple #2
0
 def initialize_tabular_cpd(self, cid: BayesianModel) -> bool:
     """initialize the TabularCPD with a matrix representing a uniform random distribution"""
     parents = cid.get_parents(self.variable)
     # check that parents are initialized
     for parent in parents:
         if not cid.get_cpds(parent):
             return False
     parents_card = [cid.get_cardinality(p) for p in parents]
     transition_matrix = np.ones(
         (self.variable_card,
          np.product(parents_card).astype(int))) / self.variable_card
     super().__init__(self.variable,
                      self.variable_card,
                      transition_matrix,
                      parents,
                      parents_card,
                      state_names=self.state_names)
     return True
Exemple #3
0
class Utilities(object):
    def __init__(self, file):
        ''' no object creation -> opportune  ?'''
        self.keywords = ['BENS', 'MEMS', 'LANS', 'MOTOR', 'WORLD']
        self.standard_nodes = {
            'RONS': {
                'BENS': [],
                'MEMS': []
            },
            'LANS': {
                'LANS': []
            },
            'LENS': {
                'MOTOR': [],
                'WORLD': []
            }
        }
        self.file = file
        self.get_json_path(file)
        self.pgmpy_object = BayesianModel()
        self.networkx_object = nx.DiGraph()
        self.header = ''
        self.dictionary = []

    def get_nodes_in_family(self, family, attributes=False):
        nw_nodes = self.networkx_object.nodes()
        nw_dim = np.asarray(nw_nodes).ndim
        nodes = []
        for i, node in enumerate(nw_nodes):
            if nw_dim > 1:
                node = node[0]
            if family in node:
                nodes.append(node)
        return nodes

    def check_json_path(directory):
        """
        Checks whether the necessary project_repository directory exists.
        If not, creates it

        :param directory: the mother directory to search from downwards

        :type directory: string
        :rtype : none
        """
        if not os.path.exists(directory + '\project_repository\\'):
            os.makedirs(directory + '\project_repository\\')

    def get_json_path(self, file):
        """
        Creates a string containing the full path for the filename passed
        so it will be saved in the project_repository directory

        :param filename: filename without path or extension
        :return: a full path for the file

        :type filename :string
        :rtype : string
        """
        levels = 5
        common = os.path.dirname(os.path.realpath(__file__))
        for i in range(levels + 1):
            common = os.path.dirname(common)
            if 'peepo\peepo' not in common:
                break
        Utilities.check_json_path(common)
        self.file = str(common + '\project_repository\\' + file + '.json')
        print('in get_json_path :', self.file)

    def save_json(self, astring):
        """
        This helping function is only needed to have the json file  formatted in a user friendly way
        as the "dump" method does not provide a lot of possibilities to get it "pretty"

        :param file :the ull path of the json file
        :param astring: the name of the string containing the whole information
        :return: void
        :type file: string
        :type astring : string
        :rtype : void
        """
        text_file = open(str(self.file), "w")
        '''remove all LF written by the dump method'''
        astring = re.sub('\n', '', astring)
        '''For keywords -> insert LF and tabs'''
        astring = re.sub('\"Identification', '\n\"Identification', astring)
        astring = re.sub('\"Date', '\n\"Date', astring)
        astring = re.sub('\"Description', '\n\"Description', astring)
        astring = re.sub('\"Train_from', '\n\"Train_from', astring)
        astring = re.sub('\"Frozen', '\n\"Frozen', astring)
        astring = re.sub('\"Nodes', '\n\n\"Nodes', astring)
        astring = re.sub('\"RONS', '\n\t\t\"RONS', astring)
        astring = re.sub('\"BENS', '\n\t\t\t\"BENS', astring)
        astring = re.sub('\"MEMS', '\n\t\t\t\"MEMS', astring)
        astring = re.sub('\"LANS', '\n\t\t\"LANS', astring)
        astring = re.sub('\"LENS', '\n\t\t\"LENS', astring)
        astring = re.sub('\"MOTOR', '\n\t\t\t\"MOTOR', astring)
        astring = re.sub('\"WORLD', '\n\t\t\t\"WORLD', astring)
        astring = re.sub('\"Edges', '\n\n\"Edges', astring)
        astring = re.sub('\"CPDs', '\n\n\"CPDs', astring)
        astring = re.sub('{', '\n\t\t{', astring)
        text_file.write(astring)
        text_file.write('\n')
        text_file.close()

    def translation(self, astring, from_man_to_machine):
        """
        Given an array of tuples (a,b) in dictionary, returns the second element of the tuple where astring was found
        Is used to not loose the users node names as peepo generates standardized names for the corresponding node

        :param dictionary:an array of tuples -> is created in the method : get_network(file)
        :param astring: the name of the node passsed by the user
        :param from_man_to_machine: an integer -> 0 when we want the translation for the user give name to the standardized name, 1 the other way around
        :return: the corresponding standardized node name
        :type dictionary: np.array
        :type astring : string
        :rtype : string
        """
        source = 0
        target = 1
        if from_man_to_machine == 1:
            source = 1
            target = 0

        for index, item in enumerate(self.dictionary):
            if item[source] == astring:
                break
        return item[target]

    def clean_edge_list(self, edge_array, parent):
        '''the get functions for the edges, both in networx as pgmpy contain the parent name
            this function removes it from the list'''
        cleaned_list = []
        for a in edge_array:
            if a != parent:
                cleaned_list.append(a)
        return cleaned_list

    def clean_parent_list(self, parent_array, child):
        '''the get functions for the edges, both in networx as pgmpy contain the parent name
            this function removes it from the list'''
        cleaned_list = []
        for i, a in enumerate(parent_array):
            if a[0] != child:
                cleaned_list.append(a[0])
        return cleaned_list

    def get_edges(self):
        """
        Creates a dictionary with a node as a key and an array with its child as value
        (the methods get_child give generally a list of tuples (parent,child)

        :param  pgmpy_object: the pgmpy network
        :return: a dictionary with the edges of all the node

        :type fpgmpy_object:adress
        :rtype :dictionary
                """
        edg = self.pgmpy_object.edges()
        edges = dict()
        [
            edges[str(t[0])].append(str(t[1])) if t[0] in list(edges.keys())
            else edges.update({str(t[0]): [str(t[1])]}) for t in edg
        ]
        return edges

    def get_nodes_and_attributes(self):
        """
        Creates an  array  of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's and
         the key cardinality returns an int
         the key cpd a 2 dimensional matrix

        :param  pgmpy_object: the pgmpy network
        :return: array  of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's

        :type  :pgmpy_object:adress
        :rtype :array of tuples
        """
        nodes = self.pgmpy_object.nodes()
        nod_and_attributes = []
        [
            nod_and_attributes.append((str(node), {
                'cardinality':
                int(self.pgmpy_object.get_cardinality(node)),
                'cpd':
                self.pgmpy_object.get_cpds(node).values.astype(float)
            })) for i, node in enumerate(nodes)
        ]
        #need to reshape the cpds when more than 1 parent
        for i, node in enumerate(nod_and_attributes):
            shape = nod_and_attributes[i][1]['cpd'].shape
            dimension = nod_and_attributes[i][1]['cpd'].ndim
            if dimension > 2:
                col = int(np.prod(shape) / shape[0])
                nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][
                    'cpd'].reshape(shape[0], col)
            nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][
                'cpd'].tolist()
        return nod_and_attributes

    def translate_pgmpy_to_digraph(self):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.networkx_object = nx.DiGraph()
        edges = self.pgmpy_object.edges()
        nodes_and_attributes = self.get_nodes_and_attributes()
        self.networkx_object.add_nodes_from(nodes_and_attributes)
        self.networkx_object.add_edges_from(edges)
        return

    def update_networkx(self, networkx, dic, header):
        self.header = header
        self.dictionary = dic
        self.networkx_object = networkx

    def update_pgmpy(self, pgmpy, dic, header):
        self.header = header
        self.dictionary = dic
        self.pgmpy_object = pgmpy

    def save_pgmpy_network(self):
        """
                Saves the passed pgmpy_object class object in a json file
        """
        self.translate_pgmpy_to_digraph()
        self.save_network()
        return

    def translate_digraph_to_pgmpy(self, digraf):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.pgmpy_object, x, y = self.get_pgmpy_network(from_object=True,
                                                         digraph=digraf)
        return self.pgmpy_object

    def translate_pgmpy_to_digraph(self):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.networkx_object = nx.DiGraph()
        edges = self.pgmpy_object.edges()
        nodes_and_attributes = self.get_nodes_and_attributes()
        self.networkx_object.add_nodes_from(nodes_and_attributes)
        self.networkx_object.add_edges_from(edges)
        return

    def save_network(self):
        """
        Saves the passed networkx class object in a json file

        """
        data = self.get_empty_canvas()
        data["header"] = self.header
        nw_nodes = self.networkx_object.nodes(data=True)
        nw_edges = self.networkx_object.edges()
        keywords = self.keywords
        nodes = copy.deepcopy(
            self.standard_nodes
        )  #{'RONS': {'BENS': [], 'MEMS': []}, 'LANS': {'LANS': []}, 'LENS': {'MOTOR': [], 'WORLD': []}}
        edges = []
        cpds = []
        '''adding edges'''
        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            childs = []
            for k, edge in enumerate(nw_edges):
                if edge[0] == node_name:
                    childs.append(self.translation(edge[1], 1))
            if len(childs) != 0:
                edges.append({self.translation(node_name, 1): childs})

        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            cardinality = node[1]['cardinality']
            cpd = node[1]['cpd']
            for pseudonym in keywords:
                if pseudonym in node_name:
                    node_name_ = self.translation(node_name, 1)
                    if pseudonym == 'BENS' or pseudonym == 'MEMS':
                        nodes['RONS'][pseudonym].append(
                            [node_name_, cardinality])
                    if pseudonym == 'LANS':
                        nodes['LANS'][pseudonym].append(
                            [node_name_, cardinality])
                    if pseudonym == 'MOTOR' or pseudonym == 'WORLD':
                        nodes['LENS'][pseudonym].append(
                            [node_name_, cardinality])
            cpds.append({self.translation(node_name, 1): cpd})
        data['Nodes'] = nodes
        data['Edges'] = edges
        data['CPDs'] = cpds
        data['header']['Date'] = datetime.datetime.now().strftime("%c")
        self.save_json(json.dumps(data))
        return

    def get_pgmpy_network(self, from_object=False, digraph=None):
        """
        Reads the passed json file and translates it's content to the passed pgmpy class object
        - uses the get_network(file) to read the json file in a networkx format and translate this to pgmpy
        - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)]

        :param file: : filename without path or extension
        :pgmp_object : the pgmpy object which will be completed
        :return: a dictionary as an array of tuples and the header of the json file

        :type file : string
        :type pgmp_object : pgmpy class object
        :rtype : array of tuples, dictionary

        CAUTION : the method does not perform a check() on the constructed DAG ! -> has to be done in the calling module
        """
        self.pgmpy_object = BayesianModel()
        if not (from_object):
            network, dictionary, header = self.get_network()
        else:
            network = digraph
        nw_nodes = network.nodes(data=True)
        nw_edges = network.edges()
        '''adding nnodes and edges'''
        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            self.pgmpy_object.add_node(node_name)
            for k, edge in enumerate(nw_edges):
                if edge[0] == node_name:
                    self.pgmpy_object.add_edge(node_name, edge[1])
        '''add  cpd's'''
        for i, node in enumerate(nw_nodes):
            parent_nodes = network.in_edges(node[0])
            parent_nodes = self.clean_parent_list(parent_nodes, node[0])
            cpd = node[1]['cpd']
            ''' find the cardinality of the node '''
            cardinality_node = node[1]['cardinality']
            """  cardinality card of parents has to be determined"""
            cardinality_parents = []
            for i, nod in enumerate(parent_nodes):
                cardinality_parents.append(network.node[nod]['cardinality'])
            ''' Depending on the place in the BN and/or the number of parents  the PGMPY CPD methods have another call'''
            if len(cardinality_parents) == 0:
                self.pgmpy_object.add_cpds(
                    TabularCPD(variable=node[0],
                               variable_card=cardinality_node,
                               values=[cpd]))
                continue
            table = TabularCPD(variable=node[0], variable_card= cardinality_node, values=cpd, \
                              evidence=parent_nodes,\
                              evidence_card=np.asarray(cardinality_parents))
            self.pgmpy_object.add_cpds(table)
        '''------TO DELETE-------------'''
        # pgmpy_object.check_model()
        # draw_network(pgmpy_object)
        '''-----------------------------'''
        return self.pgmpy_object, self.dictionary, self.header

    def get_network(self):
        """
        Reads the passed json file and translate it's content in a networkx class object
        - The nodes in the object are renamed so they have a standardized signature
        - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)]

        :param file: : filename without path or extension
        :return: a networkx class object, dictionary as an array of tuples and the header of the json file

        :type file : string
        :rtype : networkx class object, array of tuples, dictionary
        """
        self.dictionary = []
        self.networkx_object = nx.DiGraph()
        with open(self.file) as f:
            data = f.read()
        '''Remove possible non informative characters'''
        data = re.sub('\n', '', data)
        data = re.sub('\t', '', data)
        data = json.loads(data)
        self.header = data['header']
        '''Feeding G with the nodes'''
        cardinality = {}
        for key in data['Nodes'].keys():
            for secondkey in data['Nodes'][key].keys():
                for c, n in enumerate(data['Nodes'][key][secondkey]):
                    node = secondkey + "_" + str(c)
                    self.networkx_object.add_node(node, {
                        'cardinality': n[1],
                        'cpd': []
                    })
                    self.dictionary.append((n[0], node))
                    cardinality.update(
                        {node: n[1]}
                    )  #this contains the cardinality of each node with the node name as dictionary entry
        '''Feeding G with the edges'''
        edges = []
        for j, pair in enumerate(data['Edges']):
            for parent in pair.keys():
                for child in data['Edges'][j][parent]:
                    parent_ = self.translation(parent, 0)
                    child_ = self.translation(child, 0)
                    edges.append((parent_, child_))
        np.ravel(edges)
        self.networkx_object.add_edges_from(edges)
        '''Feeding G with the  CPD's as nodes attributes'''
        for j, node in enumerate(data['CPDs']):
            for parent, cpd in node.items():
                node_ = self.translation(parent, 0)
                self.networkx_object.node[node_]['cpd'] = cpd
        '''TO REMOVE LATER'''
        # plt.figure(figsize=(10, 5))
        # pos = nx.circular_layout(G, scale=2)
        # node_labels = nx.get_node_attributes(G, 'cpd')
        # nx.draw(G, pos, node_size=1200, node_color='lightblue',
        #         linewidths=0.25,  font_size=10, font_weight='bold', with_labels=True)
        # plt.show()
        return self.networkx_object, self.dictionary, self.header

    def create_json_file(self, **kwargs):
        """
        EWAMPLE :

        A helping method if the user prefers to create the BN within the code

        :param case_name: the file name without path or extension where the json file will be saved
        :param : **kwargs takes the following variables:
                                                            description = kwargs.get('description', '')
                                                            train_from = kwargs.get('train_from', '')
                                                            cpds = kwargs.get('CPDs', [])
                                                            bens = kwargs.get('BENS',[])
                                                            mems = kwargs.get('MEMS', [])
                                                            lans = kwargs.get('LANS', [])
                                                            motors = kwargs.get('MOTORS', [])
                                                            world = kwargs.get('WORLD', [])
                                                            edges = kwargs.get('Edges', [])
                                                            frozen = kwargs.get('frozen',False)
        .
        .
        .
        :return: void

        :type case_name : string
        :type  :
        .
        .
        .
        :rtype : void
        """
        description = kwargs.get('description', '')
        train_from = kwargs.get('train_from', '')
        cpds = kwargs.get('CPDs', [])
        bens = kwargs.get('BENS', [])
        mems = kwargs.get('MEMS', [])
        lans = kwargs.get('LANS', [])
        motors = kwargs.get('MOTORS', [])
        world = kwargs.get('WORLD', [])
        edges = kwargs.get('Edges', [])
        frozen = kwargs.get('frozen', False)

        #json_tab_file_write = JSONTabIndentFileWriter( Case_name,5a)
        data = self.get_empty_canvas()
        '''       - the 3 next items are for tracking purpose only, not fundamentally necessary'''
        data["header"]['Identification'] = self.file
        data["header"]['Date'] = datetime.datetime.now().strftime("%c")
        data["header"]['Description'] = description
        '''       - the next item gives a file containing possible training data (OPTIONAL)'''
        data["header"]['Train_from'] = train_from
        '''      Frozen tells whether or not the model can be considered as final i.e. is there still "training" needed'''
        data["header"]['Frozen'] = frozen
        '''       - the 5 next lines tells how much nodes  and their names + cardinality the model will start with
                    the names can be any valid python string'''
        bens = [['pooping', 2], ['peeing', 2], ['constipated', 2]]
        mems = [['havenotoiletpaper', 2]]
        lans = [['diarhea', 2], ['happypoop', 2]]
        motors = [['asshole1', 2], ['asshole2', 2]]
        world = [['toilet1', 2], ['toilet2', 2], ['garden1', 2],
                 ['garden2', 2], ['doctor', 2]]
        '''     - the next items describe the edges as a dictionary
                 -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs'''
        edges = []
        '''       !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty'''
        edges.append({'pooping': ['toilet1', 'diarhea', 'happypoop']})
        edges.append({'peeing': ['toilet2', 'garden1', 'garden2']})
        edges.append({'constipated': ['doctor']})
        edges.append({'havenotoiletpaper': ['garden1', 'garden2']})
        edges.append(
            {'diarhea': ['toilet1', 'doctor', 'asshole1', 'asshole2']})
        edges.append(
            {'happypoop': ['garden1', 'garden2', 'asshole1', 'asshole2']})
        '''       - the next items describe the CPD's  as a dictionary
                  -> the dictionary entry is the corresponding node'''
        cpds = []
        cpds.append({'pooping': [0.5, 0.5]})
        cpds.append({'peeing': [0.2, 0.8]})
        cpds.append({'constipated': [0.9, 0.1]})
        cpds.append({'havenotoiletpaper': [0.6, 0.4]})
        cpds.append({'happypoop': [[0.3, 0.8], [0.7, 0.2]]})
        cpds.append({'diarhea': [[0.8, 0.3], [0.2, 0.7]]})
        cpds.append({'toilet1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'asshole1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'asshole2': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'toilet2': [[0.5, 0.5], [0.5, 0.5]]})
        cpds.append({'doctor': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({
            'garden1': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5],
                        [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]]
        })
        cpds.append({
            'garden2': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5],
                        [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]]
        })
        '''       - feeding the data'''
        data["Nodes"]['RONS']['BENS'] = bens
        data["Nodes"]['RONS']['MEMS'] = mems
        data["Nodes"]['LANS']['LANS'] = lans
        data["Nodes"]['LENS']['MOTOR'] = motors
        data["Nodes"]['LENS']['WORLD'] = world
        data["Edges"] = edges
        data["CPDs"] = cpds
        ''' dumping to CASENAME file in jason format'''
        self.save_json(json.dumps(data))

        print("Json file for  - ", self.file, "  - created")

    def create_json_template(self):
        """
        A helping method if the  jason template in the project_repository ditectory has been deleted or corrupted

        :param : void
        :return: void

        :type : void
        :rtype : void
        """
        self.get_json_path(
            "Template"
        )  # creates the right path in which case_name will be saved
        data = self.get_empty_canvas()
        data['header']['Identification'] = self.file
        '''Filling some dummies to facilitate the user'''
        a_node = ['*', 0]
        an_edge = {'*': ['&', '&', '&']}
        a_cpd = {'*': [[0, 0, 0], [0, 0, 0]]}
        nodes = []
        edges = []
        cpds = []
        for i in range(0, 3):
            nodes.append(a_node)
            edges.append(an_edge)
            cpds.append(a_cpd)

        data['Nodes']['RONS']['BENS'] = nodes
        data['Nodes']['RONS']['MEMS'] = nodes
        data['Nodes']['LANS']['LANS'] = nodes
        data['Nodes']['LENS']['MOTOR'] = nodes
        data['Nodes']['LENS']['WORLD'] = nodes
        data['Edges'] = edges
        data['CPDs'] = cpds
        ''' dumping to CASENAME file in jason format'''
        # with open(case_name, 'w') as f:
        #     json.dump(data, f, separators = (",",":"))
        self.save_json(json.dumps(data))
        print("Empty template created")

    def get_empty_canvas(self):
        """
         This method creates a json canvas which will be used for the several json creating method

         :param : void
         :return: a dictionary with the structure of the json file
         :type : non
         :rtype : dictionary
         """

        data = {
            'header': {
                'Identification': '',
                'Date': '',
                'Description': '',
                'Frozen': '',
                'Train_from': ''
            },
            'Nodes': {},
            'Edges': [],
            'CPDs': []
        }
        '''       - the 5 next lines tells how much nodes  and their names the model will start with
                    the names can be any valid python string'''
        bens = []
        mems = []
        lans = []
        motors = []
        world = []
        '''     - the next items describe the edges as a dictionary
                 -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs

                 !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty'''
        edges = []
        '''       - the next items describe the CPD's  as a dictionary
                  -> the dictionary entry is the corresponding node'''
        cpds = []
        '''       - feeding the data'''
        data['Nodes'] = {
            'RONS': {
                'BENS': bens,
                'MEMS': mems
            },
            'LANS': {
                'LANS': lans
            },
            'LENS': {
                'MOTOR': motors,
                'WORLD': world
            }
        }
        data['Edges'] = edges
        data['CPDs'] = cpds
        return data
Exemple #4
0
cpd_s = TabularCPD(variable='S',
                   variable_card=2,
                   values=[[0.95, 0.2], [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])

# 将有向无环图与条件概率分布表关联
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

# 验证模型:检查网络结构和CPD,并验证CPD是否正确定义和总和为1
model.check_model()
# 获取概率图模型
model.get_cpds()
# 获取节点G的概率表
#print(model.get_cpds('G'))
# 获取节点G的基数
model.get_cardinality('G')
# 获取整个贝叶斯网络的局部依赖
model.local_independencies(['D', 'I', 'S', 'G', 'L'])
from pgmpy.inference import VariableElimination
infer = VariableElimination(model)
# 边缘化其他变量,求某一变量的概率
print(infer.query(['G'])['G'])
# 计算条件概率分布
print(infer.query(['G'], evidence={'D': 1, 'I': 1})['G'])
print(111, infer.query(['G'], evidence={'I': 1, 'L': 1, 'D': 1})['G'])
# 对于给定条件的变量状态进行预测
print(infer.map_query('G'))
print(infer.map_query('G', evidence={'D': 0, 'I': 1}))
print(infer.map_query('G', evidence={'D': 0, 'I': 1, 'L': 1, 'S': 1}))
Exemple #5
0
class TestBayesianModelMethods(unittest.TestCase):

    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                                ('d', 'e'), ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'], evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of('g')
        ancenstors2 = self.G2._get_ancestors_of('d')
        ancenstors3 = self.G2._get_ancestors_of(['i', 'l'])
        self.assertEqual(ancenstors1, {'d', 'i', 'g'})
        self.assertEqual(ancenstors2, {'d'})
        self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h')

    def test_get_cardinality(self):
        self.assertDictEqual(self.G1.get_cardinality(), {'diff': 2, 'intel': 3, 'grade': 3})

    def test_get_cardinality_with_node(self):
        self.assertEqual(self.G1.get_cardinality('diff'), 2)
        self.assertEqual(self.G1.get_cardinality('intel'), 3)
        self.assertEqual(self.G1.get_cardinality('grade'), 3)

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'), Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032,
               0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_markov_blanet(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('y', 'w'), ('y', 'v'), ('u', 'w'), 
                           ('s', 'v'), ('w', 't'), ('w', 'm'), ('v', 'n'), ('v', 'q')])
        self.assertEqual(set(G.get_markov_blanket('y')), set(['s', 'w', 'x', 'u', 'z', 'v']))

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1
Exemple #6
0
class MyClass(object):
    def __init__(self, case):
        self.case = case
        self.results = []
        self.networx_test = nx.DiGraph()
        self.networx_fixed = nx.DiGraph()
        self.pgmpy_test = BayesianModel()
        self.networx = nx.DiGraph()
        self.pgmpy = BayesianModel()
        self.best_error = math.inf
        self.best_topology = [0, 0, nx.DiGraph,
                              0]  #[error, entropy, networkx DiGraph, loop]
        self.dictionary = []
        self.header = {}
        self.nodes_0 = []
        self.edges_0 = {}
        self.nodes = []
        self.edges = {}
        self.cpds = {}
        self.colors_dictionary = {}
        self.colors_table = []
        self.colors_cpd = []
        self.learning_data = {}
        self.nummber_of_colors = 0
        self._util = Utilities(case)
        self._lat = Lattices(self._util)
        self.expected_result = [0, 0, 0]
        self.loop = 0

    def get_my_colors(self):
        evidence = []
        cardinality = []
        for i, node in enumerate(self.nodes):
            if 'BEN' in node[0] or 'MEM' in node[0]:
                evidence.append(node[0])
                cardinality.append(node[1]['cardinality'])
        self.colors_dictionary, self.colors_table, self.colors_cpd = self.color_cpd(
            'WORLD', 3, evidence, cardinality)
        self.number_of_colors = self.colors_table.shape[1]
        # for i in range(0, len(self.colors_table[1])):
        #     rows = len(self.colors_table)
        #     hi = 1000
        #     lo = 1
        #     sum = hi+(rows-1)
        #     hi /= sum
        #     lo /= sum
        #     for j in range(0, rows):
        #         if self.colors_table[j][i] == 1:
        #             self.colors_table[j][i] = hi
        #         else:
        #             self.colors_table[j][i] = lo

        # print('Number of colors : ', self.number_of_colors)
        # print(self.colors_cpd)
        #print(self.colors_cpd.values)

    def color_cpd(self, var, card_var, evidence, cardinality):
        table = CPD.get_index_matrix(cardinality)
        colors = {}
        hi = 1  #0.999
        lo = 1 - hi
        C = np.prod(cardinality)
        matrix = np.full((3, C), 1. / 3.)
        if 'BENS_1' in evidence and not 'BENS_2' in evidence and 'BENS_3' in evidence and 'BENS_0' in evidence:
            matrix[0] = [1. / 3, lo, hi, 1. / 3, 1. / 3, lo, hi, 1. / 3]
            matrix[1] = [1. / 3, lo, lo, 1. / 3, 1. / 3, lo, lo, 1. / 3]
            matrix[2] = [1. / 3, hi, lo, 1. / 3, 1. / 3, hi, lo, 1. / 3]
        if 'BENS_1' in evidence and not 'BENS_2' in evidence and 'BENS_3' in evidence and not 'BENS_0' in evidence:
            matrix[0] = [1. / 3, lo, hi, 1. / 3]
            matrix[1] = [1. / 3, lo, lo, 1. / 3]
            matrix[2] = [1. / 3, hi, lo, 1. / 3]
        if 'BENS_1' in evidence and 'BENS_2' in evidence and 'BENS_3' in evidence and not 'BENS_0' in evidence:
            matrix[0] = [lo, lo, lo, lo, hi, lo, hi, lo]
            matrix[1] = [hi, lo, hi, lo, lo, hi, lo, hi]
            matrix[2] = [lo, hi, lo, hi, lo, lo, lo, lo]
        if 'BENS_0' in evidence and 'BENS_1' in evidence and 'BENS_2' in evidence and 'BENS_3' in evidence:
            matrix[0] = [
                lo, lo, lo, lo, hi, lo, hi, lo, lo, lo, lo, lo, hi, lo, hi, lo
            ]
            matrix[1] = [
                hi, lo, hi, lo, lo, hi, lo, hi, hi, lo, hi, lo, lo, hi, lo, hi
            ]
            matrix[2] = [
                lo, hi, lo, hi, lo, lo, lo, lo, lo, hi, lo, hi, lo, lo, lo, lo
            ]

        cpd = TabularCPD(variable=var,
                         variable_card=card_var,
                         values=matrix,
                         evidence=evidence,
                         evidence_card=cardinality)
        for i, node in enumerate(evidence):
            colors.update({node: table[i]})
        return colors, table, cpd

    # def set_color(self, color):
    #     col = self.colors_table[:, color]
    #     for i in range(0,len(col)):
    #         node = 'BENS_'+ str(i)
    #         self.pgmpy.get_cpds(node).values = CPD.RON_cpd(node, self.pgmpy.get_cardinality(node), mu = int(col[i])).values

    def test_topology(self, entropy):
        self.networx_test = copy.deepcopy(self.networx)
        self.pgmpy_test = BayesianModel()
        self.pgmpy_test = self._util.translate_digraph_to_pgmpy(
            self.networx.copy())
        #model = {'main': GenerativeModel(SensoryInputVirtualPeepo(self), self.pgmpy_test)}
        self.expected_result = [0, 0, 0]
        ''' ------ going through all possible "colors'''
        error = 0
        for color in range(0, self.number_of_colors):
            states = self.colors_table[:, color]
            shape = self.colors_cpd.values.shape
            reshaped_cpd = self.colors_cpd.values.reshape(
                shape[0], int(np.prod(shape) / shape[0]))
            self.expected_result = reshaped_cpd[:, int(color)]
            for i, pixel in enumerate(states):
                if 'BENS_' + str(i) not in self.networx_test.nodes():
                    continue
                cardinality = self.pgmpy_test.get_cardinality('BENS_' + str(i))
                self.pgmpy_test.get_cpds(
                    'BENS_' + str(i)).values = CPD.create_fixed_parent(
                        cardinality, state=int(pixel))
            #error += self.do_inference(model)

            error += self.do_simple_inference()
        error /= self.number_of_colors
        self.results.append([entropy, error])
        if error <= self.best_error:
            self.best_error = error
            self.best_topology[0] = error
            self.best_topology[1] = entropy
            self.best_topology[2] = self.networx_test
            self.best_topology[3] = self.loop
        self.loop += 1

    def do_inference(self, models):
        error = 0
        for key in models:
            error += models[key].process()
        return error

    '''.................. vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv ..................................'''

    def do_simple_inference(self):
        total_prediction_error_size = 0
        for node in self.pgmpy_test.get_leaves():
            prediction = self.predict(node)
            observation = self.sensory_input(node)
            prediction_error_size = self.error_size(prediction, observation)
            prediction_error = self.error(prediction, observation)
            precision = entropy(prediction, base=2)
            total_prediction_error_size += prediction_error_size
        return total_prediction_error_size

    def predict(self, node):
        """
        Predicts the given leaf node (i.e. the observational node) based on the root nodes (i.e. the belief nodes)
        :return: prediction for given observation variable, a prediction is a probability distribution
        :rtype: np.array
        """
        infer = VariableElimination(self.pgmpy_test)
        evidence = self.get_root_nodes()
        evidence = {k: v for k, v in evidence.items() if k not in [node]}
        return infer.query(variables=[node], evidence=evidence)[node].values

    def sensory_input(self, name):
        expected_result = self.expected_result
        cpds = []
        for i in range(0, len(expected_result)):
            cpds.append([
                'WORLD_' + str(i),
                CPD.create_fixed_parent(2, state=int(expected_result[i]))
            ])
        for i, node in enumerate(self.nodes):
            for j in range(0, len(cpds)):
                if name == cpds[j][0]:
                    return cpds[j][1]

    def error(self, pred, obs):
        """
        Calculates the prediction error as the residual of subtracting the predicted inputs from the observed inputs
        :param pred: predicted sensory inputs
        :param obs: observed sensory inputs
        :return: prediction error
        :type pred : np.array
        :type obs : np.array
        :rtype : np.array
        """
        return obs - pred

    def error_size(self, pred, obs):
        """
        Calculates the size of the prediction error as the Kullback-Leibler divergence. This responds the magnitude
        of the prediction error, how wrong the prediction was.
        :param pred: predicted sensory inputs
        :param obs: observed sensory inputs
        :return: prediction error size
        :type pred : np.array
        :type obs : np.array
        :rtype : float
        """
        return entropy(obs, pred)

    def get_root_nodes(self):
        """
        Returns status of all root nodes.
        :param network: Bayesian Network representing the generative model
        :return: Dictionary containing all root nodes as keys and status as values
        :type network: BayesianModel
        :rtype dict
        """
        roots = {}
        for root in self.pgmpy_test.get_roots():
            roots.update(
                {root: np.argmax(self.pgmpy_test.get_cpds(root).values)})
        return roots

    def get_observations(self):
        obs = {}
        for leaf in self.pgmpy_test.get_leaves():
            obs.update(
                {leaf: np.argmax(self.pgmpy_test.get_cpds(leaf).values)})
        return obs

    '''**********************   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^                           '''

    def estimate_parameters(self):
        data = pd.DataFrame(data=self.learning_data)
        sample_size = len(self.learning_data)
        # print(sample_size)
        estimator = BayesianEstimator(self.pgmpy, data)
        # print('data')
        # print('pgmpy node : ', self.pgmpy.nodes())
        # print(self.learning_data)
        # print(data)
        pseudocount = {
            'BENS_0': [1, 2],
            'BENS_1': [1, 2],
            'BENS_2': [1, 2],
            'BENS_3': [1, 2],
            'WORLD_0': [1, 2],
            'WORLD_1': [1, 2],
            'WORLD_2': [1, 2]
        }

        pseudocount = [0.9, 0.9]
        if not 'BENS_1' in self.pgmpy.nodes(
        ) or not 'BENS_2' in self.pgmpy.nodes(
        ) or not 'BENS_3' in self.pgmpy.nodes():
            pseudocount = [0.9, 0.9, 0.9]
        # print('pseudocount :', pseudocount)
        for i, node in enumerate(self.nodes):

            if 'LAN' in node[0] or 'MOTOR' in node[0] or 'WORLD' in node[0]:
                # print('cardinality node ', node[0], ' : ', self.pgmpy.get_cardinality(node[0]))
                # print(self.pgmpy.get_cpds(node[0]).values)
                #self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd(node[0], prior_type='dirichlet', pseudo_counts=pseudocount).values
                self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd(
                    node[0],
                    prior_type='BDeu',
                    equivalent_sample_size=sample_size).values

    def add_edges(self, topology):
        self.networx.remove_edges_from(self.edges)
        self.edges = []
        self.nodes = []
        shape = np.asarray(topology).shape
        ''' let's first remove all void nodes  ----> not necssary -----> delete the code ??'''
        nodes_to_remove = []
        # rows = np.sum(topology, axis = 1)
        # for row in range(0, len(rows)):
        #     if rows[row] == 0:
        #         nodes_to_remove.append('WORLD_' + str(row))
        columns = np.sum(topology, axis=0)
        for column in range(0, len(columns)):
            if columns[column] == 0:
                nodes_to_remove.append('BENS_' + str(column))
        self.networx.remove_nodes_from(nodes_to_remove)
        self.nodes = self.networx.nodes(data=True)
        for column in range(0, shape[1]):
            for row in range(0, shape[0]):
                if topology[row][column] == 1:
                    parent = 'BENS_' + str(column)
                    child = 'WORLD_' + str(row)
                    self.networx.add_edge(parent, child)
        self.edges = self.networx.edges()
        # print('edges  --------------------------- >', self.edges)
        # print(self.nodes)

    def add_dummy_cpds(self):
        for i, node in enumerate(self.nodes):
            cardinality = node[1]['cardinality']
            if ('BEN' in node[0]) or ('MEM' in node[0]):
                self.nodes[i][1]['cpd'] = CPD.create_fixed_parent(
                    cardinality, modus='uniform')
            else:
                incoming_nodes = self.networx.in_edges(node[0])
                if len(incoming_nodes) == 0:
                    self.nodes[i][1]['cpd'] = CPD.create_random_child(
                        cardinality, modus='orphan')
                    continue
                card_parent = []
                for m, n in enumerate(incoming_nodes):
                    par = self.networx.node[n[0]]['cardinality']
                    card_parent.append(par)
                self.nodes[i][1]['cpd'] = CPD.create_random_child(
                    cardinality, card_parent)

        # for i, node in enumerate(self.nodes):
        #     print(node[0])
        #     print(node[1]['cpd'])
        self.nodes = self.networx.nodes(data=True)
        # print('   IN NETWORX  ')
        # for i, node in enumerate(self.nodes):
        #     print(node[0])
        #     print(node[1]['cpd'])

    def create_learning_data(self):
        self.get_my_colors()
        self.learning_data = {}
        ben_nodes = [x for x in self.nodes if "BEN" in x[0]]
        world_nodes = [x for x in self.nodes if "WORLD" in x[0]]

        for i, node in enumerate(ben_nodes):
            self.learning_data.update({node[0]: self.colors_table[i].tolist()})

        for i, node in enumerate(world_nodes):
            shape = self.colors_cpd.values.shape
            reshaped_cpd = self.colors_cpd.values.reshape(
                shape[0], int(np.prod(shape) / shape[0]))
            for hue in range(0, 3):
                if str(hue) in node[0]:
                    self.learning_data.update(
                        {node[0]: reshaped_cpd[hue, :].tolist()})
        # for i, node in enumerate(self.nodes):
        #     if "BEN" in node[0]:
        #         self.learning_data.update({node[0]:self.colors_table[i].tolist()})
        #     if "WORLD" in node[0]:
        #         shape = self.colors_cpd.values.shape
        #         reshaped_cpd = self.colors_cpd.values.reshape(shape[0], int(np.prod(shape)/shape[0]))
        #         for hue in range(0,3):
        #             if str(hue) in node[0]:
        #                 self.learning_data.update({node[0]:reshaped_cpd[hue,:].tolist()})
        # print('Learning data')
        # print(self.learning_data)

    def do_it(self):
        '''EXPLANATIONS'''
        self.networx_fixed, self.dictionary, self.header = self._util.get_network(
        )
        self.networx = self.networx_fixed.copy()
        self.networx_test = self.networx_fixed.copy()
        print('Dictionary : ', self.dictionary)
        ''' -------------- Constructing all possible topologies, 
                              --> option : restrain the number with the treshold : 
                                        0 -> all possible topologies, 100 -> only the fully connnected topology'''
        possible_topologies = self._lat.get_possible_topologies(
            treshold=50
        )  #setting the entropy at a 50% -> only topologies with an entropy >= 0.5 will be considered
        print("Possible topologies : ", len(possible_topologies))
        entropy = 0
        count = 0  #TEMPORARY
        ''' -------------- walking through all toplogies'''
        for topology in possible_topologies:
            if self.loop < 200 or self.loop > 350:
                self.loop += 1
                count += 1
                continue
            entropy = topology[1]
            if entropy == 0:
                continue  #safeguard
            print('Loop *-> ', self.loop + 1, ' of ', len(possible_topologies))
            topo = topology[0]
            self.networx = nx.DiGraph()
            self.networx = self.networx_fixed.copy()
            ''' ----------- for each topology we construct the edges and update dummy cpd (necessary as the shape of the LENs cpd's can change
                            depending on the number of incoming nodes'''
            self.add_edges(topo)
            self.add_dummy_cpds()
            self.nodes = self.networx.nodes(data=True)
            self.create_learning_data()
            # print('edges = ' , self.edges)
            #print(self.learning_data)
            ''' ----------- convert DiGraph to pgmpy and check'''
            self.pgmpy = BayesianModel()
            self.pgmpy = self._util.translate_digraph_to_pgmpy(
                self.networx.copy())
            '''------------ ask pgmpy to guess the best cpd's of the LANs and LENs 
                             -> provide pgmpy with the learning data'''

            self.pgmpy.check_model()
            self.estimate_parameters()
            '''-------------- Testing the constructed topology'''
            self.test_topology(entropy)
            '''following  4 lines to remove : just use to check whether the algorithms are correct regarding the edges building'''
            count += 1
            #print('edges : ', self.edges)
            #
            # if count > 350:
            #     break
        print('Check -> number of processed topologies in loop : ', count)
        # print('My colors : ')
        # print(self.colors_table)
        # print(self.colors_cpd)
        '''  the methods have to be completed to cope with a general case i.e. BENS,MEMS,LANS, MOTORs, WORLDs
        but for the moment being we just assume there are only BEN's and WORLD's'''

        # self.networx.add_edge('BENS_1','WORLD_1')
        # self.networx.node['BENS_1']['cpd'] = [0.8,0.2]
        # self.networx.node['WORLD_2']['cpd'] = [[0.8, 0.2, 0.5,0.3],[0.2,0.8,0.5,0.7]]
        ''' if a best model has ben found, save it -> first update the Utility class object and save it'''
        # self._util.update_networkx(self.networx, self.dictionary, self.header)
        # self._util.save_network()
        # self._util.update_pgmpy(self.pgmpy, self.dictionary, self.header)
        # self._util.save_pgmpy_network()
        self.draw()
        self.draw_xy()
        return self.results

    def draw_xy(self):
        x = []
        y = []
        s = []
        color = []
        best_x = 0
        best_y = 0
        for i in range(0, len(self.results)):
            x.append(self.results[i][0])
            y.append(self.results[i][1])
            if i == self.best_topology[3]:
                best_x = self.results[i][0]
                best_y = self.results[i][1]
                s.append(60)
                color.append("r")
            else:
                s.append(20)
                color.append("b")
        plt.scatter(x, y, s=s, c=color, alpha=0.5)
        plt.xlabel("Complexity of topology")
        plt.ylabel("Average error over all colors")
        plt.show()

    def draw(self):
        '''TO REMOVE LATER'''
        plt.figure(figsize=(10, 5))
        pos = nx.circular_layout(self.best_topology[2], scale=2)
        #node_labels = nx.get_node_attributes(self.networx, 'cpd')
        nx.draw(self.best_topology[2],
                pos,
                node_size=1200,
                node_color='lightblue',
                linewidths=0.25,
                font_size=10,
                font_weight='bold',
                with_labels=True)
        plt.text(1, 1, 'Topology nr. : ' + str(self.best_topology[3]))
        plt.show()
class TestBayesianModelMethods(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([("a", "d"), ("b", "d"), ("d", "e"),
                                ("b", "c")])
        self.G1 = BayesianModel([("diff", "grade"), ("intel", "grade")])
        diff_cpd = TabularCPD("diff", 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD("intel", 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD(
            "grade",
            3,
            values=[
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
        )
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([("d", "g"), ("g", "l"), ("i", "g"),
                                 ("i", "l")])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ["a", "b", "c", "d", "e"])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [("a", "b"), ("a", "d"), ("b", "c"),
                                     ("d", "b"), ("e", "d")]
                            or (edge[1], edge[0]) in [("a", "b"), ("a", "d"),
                                                      ("b", "c"), ("d", "b"),
                                                      ("e", "d")])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([("a", "d"), ("d", "e"), ("b", "d"), ("b", "c"),
                           ("a", "b")])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ["a", "b", "c", "d", "e"])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [("a", "b"), ("c", "b"), ("d", "a"),
                                     ("d", "b"), ("d", "e")]
                            or (edge[1], edge[0]) in [("a", "b"), ("c", "b"),
                                                      ("d", "a"), ("d", "b"),
                                                      ("d", "e")])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of("g")
        ancenstors2 = self.G2._get_ancestors_of("d")
        ancenstors3 = self.G2._get_ancestors_of(["i", "l"])
        self.assertEqual(ancenstors1, {"d", "i", "g"})
        self.assertEqual(ancenstors2, {"d"})
        self.assertEqual(ancenstors3, {"g", "i", "l", "d"})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, "h")

    def test_get_cardinality(self):
        self.assertDictEqual(self.G1.get_cardinality(), {
            "diff": 2,
            "intel": 3,
            "grade": 3
        })

    def test_get_cardinality_with_node(self):
        self.assertEqual(self.G1.get_cardinality("diff"), 2)
        self.assertEqual(self.G1.get_cardinality("intel"), 3)
        self.assertEqual(self.G1.get_cardinality("grade"), 3)

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies("a"),
                         Independencies(["a", ["b", "c"]]))
        self.assertEqual(
            self.G.local_independencies("c"),
            Independencies(["c", ["a", "d", "e"], "b"]),
        )
        self.assertEqual(self.G.local_independencies("d"),
                         Independencies(["d", "c", ["b", "a"]]))
        self.assertEqual(
            self.G.local_independencies("e"),
            Independencies(["e", ["c", "b", "a"], "d"]),
        )
        self.assertEqual(self.G.local_independencies("b"),
                         Independencies(["b", "a"]))
        self.assertEqual(self.G1.local_independencies("grade"),
                         Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([("X", "Y"), ("Y", "Z")])
        self.assertEqual(chain.get_independencies(),
                         Independencies(("X", "Z", "Y"), ("Z", "X", "Y")))
        fork = BayesianModel([("Y", "X"), ("Y", "Z")])
        self.assertEqual(fork.get_independencies(),
                         Independencies(("X", "Z", "Y"), ("Z", "X", "Y")))
        collider = BayesianModel([("X", "Y"), ("Z", "Y")])
        self.assertEqual(collider.get_independencies(),
                         Independencies(("X", "Z"), ("Z", "X")))

    def test_is_imap(self):
        val = [
            0.01,
            0.01,
            0.08,
            0.006,
            0.006,
            0.048,
            0.004,
            0.004,
            0.032,
            0.04,
            0.04,
            0.32,
            0.024,
            0.024,
            0.192,
            0.016,
            0.016,
            0.128,
        ]
        JPD = JointProbabilityDistribution(["diff", "intel", "grade"],
                                           [2, 3, 3], val)
        fac = DiscreteFactor(["diff", "intel", "grade"], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_markov_blanet(self):
        G = DAG([
            ("x", "y"),
            ("z", "y"),
            ("y", "w"),
            ("y", "v"),
            ("u", "w"),
            ("s", "v"),
            ("w", "t"),
            ("w", "m"),
            ("v", "n"),
            ("v", "q"),
        ])
        self.assertEqual(set(G.get_markov_blanket("y")),
                         set(["s", "w", "x", "u", "z", "v"]))

    def test_get_immoralities(self):
        G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")])
        self.assertEqual(G.get_immoralities(), {("w", "x"), ("w", "z")})
        G1 = BayesianModel([("x", "y"), ("z", "y"), ("z", "x"), ("w", "y")])
        self.assertEqual(G1.get_immoralities(), {("w", "x"), ("w", "z")})
        G2 = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y"),
                            ("w", "x")])
        self.assertEqual(G2.get_immoralities(), {("w", "z")})

    def test_is_iequivalent(self):
        G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([("V", "W"), ("W", "X"), ("X", "Y"), ("Z", "Y")])
        G2 = BayesianModel([("W", "V"), ("X", "W"), ("X", "Y"), ("Z", "Y")])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([("W", "V"), ("W", "X"), ("Y", "X"), ("Z", "Y")])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds("diff")),
                            id(model_copy.get_cpds("diff")))

        self.G1.remove_cpds("diff")
        diff_cpd = TabularCPD("diff", 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds("diff"),
                            model_copy.get_cpds("diff"))

        self.G1.remove_node("intel")
        self.assertNotEqual(sorted(self.G1.nodes()),
                            sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()),
                            sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node("diff")
        self.assertEqual(sorted(self.G1.nodes()), sorted(["grade", "intel"]))
        self.assertRaises(ValueError, self.G1.get_cpds, "diff")

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(["diff", "grade"])
        self.assertEqual(sorted(self.G1.nodes()), sorted(["intel"]))
        self.assertRaises(ValueError, self.G1.get_cpds, "diff")
        self.assertRaises(ValueError, self.G1.get_cpds, "grade")

    def tearDown(self):
        del self.G
        del self.G1
Exemple #8
0
# this is just a blank file , we are reading it to get coulmn names. we will store our results in this file
df_result = pd.read_csv("D:\\Satl_project\\correct\\bayesian\\b2_input.csv")

# This is the input file which contains input data. here there is a slight change. in actual we have 3 levels level-1/2/3
# but in this file the levels are 0/1/2 because by default it starts from 0 so we have renamed the actual levels , 1->0,2->1,3->2
df = pd.read_csv("D:\\Satl_project\\correct\\bayesian\\b3_input.csv")

df_test = df.iloc[
    401:
    501, :]  # for five fold cross validation we need to run this code 5 times with different range. like 0-101,101,201 and so on
a = df_test.index
df_train = df.drop(df.index[a])

model_asset.fit(df_train)
model_asset.get_cpds()
model_asset.get_cardinality()
infer_asset = VariableElimination(model_asset)
df_test['Bayesian_label'] = 0
df_test = df_test.reset_index()
df_test = df_test.drop(['index'], axis=1)

# print df_test
for index, row in df_test.iterrows():
    #print index
    a, b, c = row['Literacy'], row['Formal Employment'], row['Current Status']
    #print a,b,c,d,e
    q_asset = infer_asset.query(['CHH_Change'],
                                evidence={
                                    'Literacy': a,
                                    'Formal Employment': b,
                                    'Current Status': c
Exemple #9
0
def pgmpyToTable(model: BayesianModel,
                 queryNode: Name,
                 grid: Grid,
                 queryNodeLongName: Name = None) -> Table:
    '''
    Function adapted from `renderTable_fromdict that is just passed the model and constructs the hashtag table
    '''
    # Assigning the long name of the node (like queryNode = 'G' but queryNodeLongName = 'Grade')
    queryNodeLongName: Name = queryNode if queryNodeLongName is None else queryNodeLongName

    condNodes: List[Name] = list(
        model.get_cpds(queryNode).state_names.keys())[1:]
    numCondNodes: int = len(condNodes)

    # Variable to add to the column span
    #extra: int = numCondNodes if numCondNodes != 0 else 1
    colSpan: int = model.get_cardinality(node=queryNode) + numCondNodes


    prefix: Table = '<<FONT POINT-SIZE="7"><TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0"><TR><TD COLSPAN="' + \
                    str(colSpan) +'">' + \
                    queryNodeLongName + '</TD></TR>'

    # Cosntructing the header so that there are enough blank cell spaces above the conditional variable columns
    header: Table = "<TR>"  #"<TR><TD></TD>"
    condVarInHeader: List[str] = [
        '<TD>' + evidenceVar + '</TD>' for evidenceVar in condNodes
    ]
    header += ''.join(condVarInHeader)

    # Getting state names and cardinality to create labels in table:
    stateNames: List[State] = model.get_cpds(queryNode).state_names[queryNode]

    numLabelTuples: List[Tuple[int, State]] = list(
        zip(range(0, model.get_cardinality(queryNode)), stateNames))

    for idNum, state in numLabelTuples:
        # Comment out below, just include the state name, no id number also
        header: Table = header + '<TD>' + queryNode + ' = ' + str(
            state) + '</TD>'
        #header: Table = header + '<TD>'  + label + ' (' + queryNode.lower() + '_' + str(idNum)  + ')</TD>'
    header: Table = header + '</TR>'

    numLoopRow: int = len(grid)
    numLoopCol: int = len(grid[0])
    body: Table = ""

    if numLoopRow <= 1:
        # No need for the extra lower case letter, we know its a marginal distribution already!
        #body: Table = "<TR><TD>" + str(queryNode).lower() + "</TD>"
        # No need to have the starting space td / td now because we can let numCondNodes = 0 (so no more extra =
        # 1 buffer)
        body: Table = "<TR>"  #<TD></TD>"

        for col in range(numLoopCol):
            body: Table = body + "<TD>" + str(grid[0][col]) + "</TD>"
        body: Table = body + "</TR>"

    else:

        for row in range(numLoopRow):
            body: Table = body + "<TR>"

            for col in range(numLoopCol):
                body: Table = body + "<TD>" + str(grid[row][col]) + "</TD>"
            body: Table = body + "</TR>"

    footer: Table = '</TABLE></FONT>>'

    return prefix + header + body + footer
class MyClass(object):
    def __init__(self, case):
        self.case = case
        self.results = []
        self.networx_test = nx.DiGraph()
        self.pgmpy_test  = BayesianModel()
        self.networx = nx.DiGraph()
        self.pgmpy = BayesianModel()
        self.best_error = math.inf
        self.best_topology = [0,0,nx.DiGraph]
        self.dictionary = []
        self.header = {}
        self.nodes_0 = []
        self.edges_0 = {}
        self.nodes = []
        self.edges = {}
        self.cpds = {}
        self.colors_dictionary ={}
        self.colors_table =[]
        self.colors_cpd = []
        self.learning_data = {}
        self.nummber_of_colors = 0
        self._util = Utilities(case)
        self._lat = Lattices(self._util)

    def get_my_colors(self):
        evidence = []
        cardinality = []
        for i, node in enumerate(self.nodes):
            if 'BEN' in node[0] or 'MEM' in node[0]:
                evidence.append(node[0])
                cardinality.append(node[1]['cardinality'])
        self.colors_dictionary, self.colors_table, self.colors_cpd = self.color_cpd('WORLD',3,evidence,cardinality)
        self.number_of_colors = self.colors_table.shape[1]
        print('Number of colors : ', self.number_of_colors)
        print(self.colors_cpd)
        #print(self.colors_cpd.values)

    def color_cpd(self,var,card_var,evidence,cardinality):
        table = CPD.get_index_matrix(cardinality)
        colors ={}
        hi = 1
        lo = 0
        C = np.prod(cardinality)
        matrix = np.full((3, C), 1. / 3.)
        matrix[0] = [hi, lo, lo, hi, lo, lo, hi, lo, hi, lo, lo, hi, lo, lo, hi, lo]
        matrix[1] = [lo, hi, lo, lo, hi, lo, lo, hi, lo, hi, lo, lo, hi, lo, lo, hi]
        matrix[2] = [lo, lo, hi, lo, lo, hi, lo, lo, lo, lo, hi, lo, lo, hi, lo, lo]
        cpd =TabularCPD(variable=var, variable_card=card_var, values=matrix,
                          evidence=evidence,
                          evidence_card=cardinality)
        for i, node in enumerate(evidence):
            colors.update({node:table[i]})
        return colors,table, cpd


    # def set_color(self, color):
    #     col = self.colors_table[:, color]
    #     for i in range(0,len(col)):
    #         node = 'BENS_'+ str(i)
    #         self.pgmpy.get_cpds(node).values = CPD.RON_cpd(node, self.pgmpy.get_cardinality(node), mu = int(col[i])).values

    def add_edges(self, topology):
        self.networx.remove_edges_from(self.edges)
        self.edges = []
        shape = np.asarray(topology).shape
        # ''' let's first remove all void nodes  ----> not necssary -----> delete the code ??'''
        # nodes_to_remove = []
        # rows = np.sum(topology, axis = 1)
        # columns = np.sum(topology,axis = 0)
        # for row in range(0, len(rows)):
        #     if rows[row] == 0:
        #         nodes_to_remove.append('WORLD_' + str(row))
        # for column in range(0, len(columns)):
        #     if columns[column] == 0:
        #         nodes_to_remove.append('BENS_' + str(column))
        # self.networx.remove_nodes_from(nodes_to_remove)
        self.nodes = self.networx.nodes(data = True)
        for column in range(0,shape[1]):
            for row in range(0,shape[0]):
                if topology[row][column] == 1:
                    parent = 'BENS_' + str(column)
                    child  = 'WORLD_'+ str(row)
                    self.networx.add_edge(parent, child)
        self.edges = self.networx.edges()


    def add_dummy_cpds(self):
        for i, node in enumerate(self.nodes):
            cardinality = node[1]['cardinality']
            if ('BEN' in node[0]) or ('MEM' in node[0]):
                self.nodes[i][1]['cpd'] = CPD.create_fixed_parent(cardinality, modus = 'uniform')
            else:
                incoming_nodes = self.networx.in_edges(node[0])
                if len(incoming_nodes) == 0:
                    self.nodes[i][1]['cpd'] = CPD.create_random_child(cardinality, modus = 'orphan')
                    continue
                card_parent = []
                for  m, n in enumerate(incoming_nodes):
                    par = self.networx.node[n[0]]['cardinality']
                    card_parent.append(par)
                self.nodes[i][1]['cpd'] = CPD.create_random_child(cardinality, card_parent)


    def create_learning_data(self):
        self.get_my_colors()
        self.learning_data = {}
        for i, node in enumerate(self.nodes):
            print('node in create learnin data : ', node[0])
            if "BEN" in node[0]:
                self.learning_data.update({node[0]:self.colors_table[i].tolist()})
            if "WORLD" in node[0]:
                shape = self.colors_cpd.values.shape
                reshaped_cpd = self.colors_cpd.values.reshape(shape[0], int(np.prod(shape)/shape[0]))
                for hue in range(0,3):
                    if str(hue) in node[0]:
                        self.learning_data.update({node[0]:reshaped_cpd[hue,:].tolist()})
        print('Learning data')
        print(self.learning_data)


    def do_inference(self, models, expected_result):

        for key in models:
            err = models[key].process()


    def test_topology(self):
        self.networx_test = self.networx.copy()
        self.pgmpy_test   = self.pgmpy.copy()
        model = {'main': GenerativeModel(SensoryInputVirtualPeepo(self), self.pgmpy_test)}
        expected_result = [0,0,0]
        ''' ------ going through all possible "colors'''
        for color in range(0, self.number_of_colors):
            states = self.colors_table[:,color]
            shape = self.colors_cpd.values.shape
            reshaped_cpd = self.colors_cpd.values.reshape(shape[0], int(np.prod(shape) / shape[0]))
            expected_result = reshaped_cpd[:,int(color)]
            for i, pixel in enumerate(states):
                cardinality = self.pgmpy_test.get_cardinality('BENS_'+str(i))
                self.pgmpy_test.get_cpds('BENS_' + str(i)).values = CPD.create_fixed_parent(cardinality, state = int(pixel))
            self.do_inference(model ,expected_result)


    def estimate_parameters(self):
        data = pd.DataFrame(data=self.learning_data)
        estimator = BayesianEstimator(self.pgmpy, data)
        for i, node in enumerate(self.nodes):
            if 'LAN' in node[0] or 'MOTOR' in node[0] or 'WORLD' in node[0]:
                self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd('WORLD_0', prior_type='dirichlet', pseudo_counts=[2, 3]).values
                # print('cpd for ', node[0])
                # print(self.pgmpy.get_cpds(node[0]))




    def do_it(self):
        '''EXPLANATIONS'''
        self.networx_test,  self.dictionary, self.header = self._util.get_network()
        self.networx = self.networx_test.copy()
        self.nodes = self.networx.nodes(data=True)
        self.create_learning_data()
        print('incoming panda data')
        print(self.learning_data)
        print('Dictionary : ', self.dictionary)

        ''' -------------- Constructing all possible topologies, 
                              --> option : restrain the number with the treshold : 
                                        0 -> all possible topologies, 100 -> only the fully connnected topology'''
        possible_topologies  = self._lat.get_possible_topologies(treshold = 50)#setting the entropy at a 50% -> only topologies with an entropy >= 0.5 will be considered
        print("Possible topologies : ", len(possible_topologies))
        entropy = 0
        count = 0#TEMPORARY
        ''' -------------- walking through all toplogies'''
        for topology in possible_topologies:
            entropy = topology[1]
            if entropy == 0:
                continue#safeguard
            topo  = topology[0]
            #self.networx = self.networx_0.copy()
            edges = []
            parent = ''
            child = ''

            ''' ----------- for each topology we construct the edges and update dummy cpd (necessary as the shape of the LENs cpd's can change
                            depending on the number of incoming nodes'''
            self.add_edges(topo)
            self.add_dummy_cpds()
            ''' ----------- convert DiGraph to pgmpy and check'''
            self.pgmpy = self._util.translate_digraph_to_pgmpy(self.networx)
            self.pgmpy.check_model()

            '''------------ ask pgmpy to guess the best cpd's of the LANs and LENs 
                             -> provide pgmpy with the learning data'''
            self.estimate_parameters()


            '''-------------- Testing the constructed topology'''
            self.test_topology()

            '''following  4 lines to remove : just use to check whether the algorithms are correct regarding the edges building'''
            count += 1
            #print('edges : ', self.edges)
            if count > 10:
                break
        print('Check -> number of processed topologies in loop : ', count)
        # print('My colors : ')
        # print(self.colors_table)
        # print(self.colors_cpd)
        '''TO DO ----------------------------------------------------
                a) add random cpds , convert to pgmpy BN, 
                b) enbedd the skeleton loop  within the learning loop->
                    loop through all possible colors and the expected classification
                    -- > for each skeleton with the possible color as BEN, make  pgmpy guess the best cpd's 
                         with the method class 
                                   in pgmpy.estimators.BayesianEstimator.BayesianEstimator(model, data, **kwargs)[source]
                                            estimate_cpd(node, prior_type='BDeu', pseudo_counts=[], equivalent_sample_size=5)[source]
                    -- > make inference and calulate the 'error (to be determined)
                    ---> log the error as a tuple (error, 'entropy of the skeleton')
                c) create output (grapgh?)
                    
            
            
            '''



        '''  the methods have to be completed to cope with a general case i.e. BENS,MEMS,LANS, MOTORs, WORLDs
        but for the moment being we just assume there are only BEN's and WORLD's'''

        # self.networx.add_edge('BENS_1','WORLD_1')
        # self.networx.node['BENS_1']['cpd'] = [0.8,0.2]
        # self.networx.node['WORLD_2']['cpd'] = [[0.8, 0.2, 0.5,0.3],[0.2,0.8,0.5,0.7]]
        ''' if a best model has ben found, save it -> first update the Utility class object and save it'''
        # self._util.update_networkx(self.networx, self.dictionary, self.header)
        # self._util.save_network()
        # self._util.update_pgmpy(self.pgmpy, self.dictionary, self.header)
        # self._util.save_pgmpy_network()
        self.draw()
        return self.results


    def draw(self):
        '''TO REMOVE LATER'''
        plt.figure(figsize=(10, 5))
        pos = nx.circular_layout(self.networx, scale=2)
        #node_labels = nx.get_node_attributes(self.networx, 'cpd')
        nx.draw(self.networx, pos, node_size=1200, node_color='lightblue',
                linewidths=0.25,  font_size=10, font_weight='bold', with_labels=True)
        plt.show()