Exemple #1
0
def make_model_adult(data):
    # c = PC(data)
    # model = c.estimate(significance_level=0.05)
    # bayesian_model = BayesianModel(model.edges)
    # print(bayesian_model.edges)
    edges = [('education.num', 'Income'), ('education.num', 'capital.loss'),
             ('capital.loss', 'Income'), ('capital.gain', 'Income'),
             ('capital.loss', 'age'), ('capital.loss', 'marital.status'),
             ('marital.status', 'age'), ('Gender', 'Income'),
             ('race', 'native.country')]
    bayesian_model = BayesianModel(edges)

    return bayesian_model
Exemple #2
0
def create_bayes_net(file, keep_atts, edges):
    atts = pd.read_csv(file)
    atts = atts[keep_atts]
    graph = BayesianModel()
    graph.add_nodes_from(atts.columns)

    # defining the structure of edges
    graph.add_edges_from(edges)

    # fit estimates the CPD tables for the given structure
    graph.fit(atts)

    return graph
Exemple #3
0
def bayeSian(k):
    fileName = '文件名';
    dataMat, dataLab = file2matrix(fileName, 9);
    durAct = dataMat[];
    testMat = dataMat[];
    count = 0;
#     testMat = dataMat[];
    testLab = np.array(dataLab[]);
    trainFraK = pd.DataFrame(dataMat,columns=[columns_name]);
    trainFra = trainFraK.ix[];
#     data_cla0 = trainFraK[trainFraK['T_TYPE']==0].values;
#     data_cla1 = trainFraK[trainFraK['T_TYPE']==1].values;
    trainInput = trainFraK[[columns_name]];
    trainArr = np.zeros((dataMat.shape[0], 4), dtype='int64');
    for arr in trainInput.values:
        trainArr[count, :]= map(int, arr);
        count += 1;
    trainInput = pd.DataFrame(trainArr, columns=[columns_name]);
    test = trainInput[];
    test = test.copy();
    test.drop('T_TYPE', axis=1, inplace=True);
    model = BayesianModel([('columns_name','columns_name'),('columns_name', 'columns_name'), ('columns_name', 'columns_name')]);
    model.fit(trainInput.ix[]);
    labelPre = model.predict(test);
    durPre = [];
    coef = 0.0;
    for i in range(len(testMat)):
        dataSet = trainFra[trainFra['T_TYPE']==labelPre['T_TYPE'][]].values;
        distPos = np.zeros(dataSet.shape[0]);
        distTim = np.zeros(dataSet.shape[0]);
        for j in range(dataSet.shape[0]):
            distPos[j] = distSLC(testMat[i], dataSet[j]);
            distTim[j] = disTim(testMat[i], dataSet[j]);
        distPosNor = distPos;#dataNorm(distPos);
        distTimNor = dataNorm(distTim);
        distAll = distPosNor*coef + distTimNor*(1-coef);
        knnIndex = distAll.argsort();
        durKnn = dataSet[knnIndex, 7][:k];
        durPre.append(sum(durKnn)/len(durKnn));
    mse = calMse(durPre, durAct);
    mape = calMape(durPre, durAct);
    count = 0;
    #print labelPre.values.tolist();
    for i in range(len(labelPre)):
        if labelPre.values[i]==testLab[i]:
            count += 1;
    print 'K: ', k;
    print '准确度: ', float(count)/len(testLab);
    print 'MSE: ', mse;
    print 'MAPE: ', mape;
    print '----------------------------------------------------------------------';
Exemple #4
0
def make_DAG(DAG, CPD=None, checkmodel=True, verbose=3):
    """Create Directed Acyclic Graph based on list.

    Parameters
    ----------
    DAG : list
        list containing source and target in the form of [('A','B'), ('B','C')].
    CPD : list, array-like
        Containing TabularCPD for each node.
    checkmodel : bool
        Check the validity of the model. The default is True
    verbose : int, optional
        Print progress to screen. The default is 3.
        0: None, 1: ERROR, 2: WARN, 3: INFO (default), 4: DEBUG, 5: TRACE

    Raises
    ------
    Exception
        Should be list.

    Returns
    -------
    pgmpy.models.BayesianModel.BayesianModel
        model of the DAG.

    """
    if (CPD is not None) and (not isinstance(CPD, list)):
        CPD=[CPD]
    if isinstance(DAG, dict):
        DAG = DAG.get('model', None)
    if (not isinstance(DAG, list)) and ('pgmpy' not in str(type(DAG))):
        raise Exception("[bnlearn] >Error: Input DAG should be a list. in the form [('A','B'), ('B','C')] or a <pgmpy.models.BayesianModel.BayesianModel>")
    elif ('pgmpy' in str(type(DAG))):
        if verbose>=3: print('[bnlearn] >No changes made to existing Bayesian DAG.')
    elif isinstance(DAG, list):
        if verbose>=3: print('[bnlearn] >Bayesian DAG created.')
        DAG = BayesianModel(DAG)

    if CPD is not None:
        for cpd in CPD:
            DAG.add_cpds(cpd)
            if verbose>=3: print('[bnlearn] >Add CPD: %s' %(cpd.variable))

        if checkmodel:
            print('[bnlearn] >Model correct: %s' %(DAG.check_model()))
    
    # Create adjacency matrix from DAG
    out = {}
    out['adjmat'] = _dag2adjmat(DAG)
    out['model'] = DAG
    return out
Exemple #5
0
def generic_model(coin_set):
    network = BayesianModel([('A', 'D'), ('B', 'D'), ('C', 'D')])
    cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.9, 0.1]])
    cpd_b = TabularCPD(variable='B', variable_card=2, values=[[0.1, 0.9]])
    cpd_c = TabularCPD(variable='C', variable_card=2, values=[[0.1, 0.9]])
    cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.9, 0.9],
                                                              [0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.1, 0.1]],
                       evidence=['A', 'B', 'C'], evidence_card=[2, 2, 2])

    network.add_cpds(cpd_a, cpd_b, cpd_c, cpd_d)
    network.check_model()

    model = GenerativeModel(SensoryInputCoin(coin_set), network)
    return model
Exemple #6
0
    def get_model(self):
        """
        Returns the model instance of the ProbModel.

        Return
        ---------------
        model: an instance of BayesianModel.

        Examples
        -------
        >>> reader = ProbModelXMLReader()
        >>> reader.get_model()
        """
        if self.probnet.get('type') == "BayesianNetwork":
            model = BayesianModel(self.probnet['edges'].keys())

            tabular_cpds = []
            cpds = self.probnet['Potentials']
            for cpd in cpds:
                var = list(cpd['Variables'].keys())[0]
                states = self.probnet['Variables'][var]['States']
                evidence = cpd['Variables'][var]
                evidence_card = [
                    len(self.probnet['Variables'][evidence_var]['States'])
                    for evidence_var in evidence
                ]
                arr = list(map(float, cpd['Values'].split()))
                values = np.array(arr)
                values = values.reshape(
                    (len(states), values.size // len(states)))
                tabular_cpds.append(
                    TabularCPD(var, len(states), values, evidence,
                               evidence_card))

            model.add_cpds(*tabular_cpds)

            variables = model.nodes()
            for var in variables:
                for prop_name, prop_value in self.probnet['Variables'][
                        var].items():
                    model.node[var][prop_name] = prop_value

            edges = model.edges()
            for edge in edges:
                for prop_name, prop_value in self.probnet['edges'][edge].items(
                ):
                    model.edge[edge[0]][edge[1]][prop_name] = prop_value
            return model
        else:
            raise ValueError("Please specify only Bayesian Network.")
def pgmpy_test2():
    # example from https://github.com/pgmpy/pgmpy/blob/dev/examples/Learning%20from%20data.ipynb
    # Generating radom data with each variable have 2 states and equal probabilities for each state

    raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
    data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])

    model = BayesianModel([('D', 'G'), ('I', 'G'), ('I', 'S'), ('G', 'L')])

    # Learing CPDs using Maximum Likelihood Estimators
    model.fit(data, estimator=MaximumLikelihoodEstimator)
    for cpd in model.get_cpds():
        print("CPD of {variable}:".format(variable=cpd.variable))
        print(cpd)
def make_model_adult(data):
    # c = PC(data)
    # model = c.estimate(significance_level=0.05)
    # bayesian_model = BayesianModel(model.edges)
    # print(bayesian_model.edges)
    edges = [('Gender', 'Income'), ('Gender', 'marital.status_0'),
             ('marital.status_0', 'Income'), ('Gender', 'workclass_1'),
             ('education.num', 'Income'), ('age', 'marital.status_1'),
             ('capital.gain', 'Income'), ('capital.gain', 'hours.per.week'),
             ('capital.loss', 'Income'), ('age', 'workclass_0'),
             ('age', 'hours.per.week'), ('native.country_1', 'education.num'),
             ('native.country_0', 'education.num')]
    bayesian_model = BayesianModel(edges)
    return bayesian_model
 def __init__(self, case):
     self.case = case
     self.results = []
     self.networx_test = nx.DiGraph()
     self.pgmpy_test  = BayesianModel()
     self.networx = nx.DiGraph()
     self.pgmpy = BayesianModel()
     self.best_error = math.inf
     self.best_topology = [0,0,nx.DiGraph]
     self.dictionary = []
     self.header = {}
     self.nodes_0 = []
     self.edges_0 = {}
     self.nodes = []
     self.edges = {}
     self.cpds = {}
     self.colors_dictionary ={}
     self.colors_table =[]
     self.colors_cpd = []
     self.learning_data = {}
     self.nummber_of_colors = 0
     self._util = Utilities(case)
     self._lat = Lattices(self._util)
Exemple #10
0
    def __init__(self, current_loc):

        self.anchor_objs = ['table', 'man']
        self.men_objs = ['hand', 'head', 'hat', 'bed']
        self.table_objs = ['laptop', 'banana', 'book', 'chair', 'paper']
        self.current_loc_objs = self.read_csv(current_folder /
                                              (current_loc + '.csv'))
        # Defining the model structure. We can define the network by just passing a list of edges.
        #print (detected)
        #model = BayesianModel([('book', 'table'), ('I', 'G'), ('G', 'L'), ('I', 'S')])
        #model = BayesianModel(detected)
        #allfiles =list((current_folder).glob('*.csv'))
        #print (get_all_objs(allfiles))
        #input()

        #all_objs, detected =  read_csv(current_folder/'0.csv')
        #print (all_objs)
        '''bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
		                                 ('J', 'L'), ('G', 'L')])
		'''
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        '''
		cpd_q = TabularCPD('Q', 2,
		                    [[0.9, 0.2],
		                     [0.1, 0.8]],
		                    ['J'], [2])
		cpd_l = TabularCPD('L', 2,
		                    [[0.9, 0.45, 0.8, 0.1],
		                     [0.1, 0.55, 0.2, 0.9]],
		                   ['G', 'J'], [2, 2])
		cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
		bayesian_model.add_cpds(cpd_a, cpd_r, cpd_j, cpd_q, cpd_l, cpd_g)
		belief_propagation = BeliefPropagation(bayesian_model)
		print (dir(belief_propagation.query(variables=['J', 'Q'],
		                          evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1})))

		print (belief_propagation.query(variables=['J', 'Q'],
		                          evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}).values)
		'''

        all_arcs = itertools.product(self.men_objs + self.table_objs,
                                     ['table'])
        #all_arcs = itertools.product(anchor_objs, men_objs)
        self.model = BayesianModel(all_arcs)
        self.build_cpds()
Exemple #11
0
def createBayesianNetwork():
    # defining the network structure
    model = BayesianModel([('asia', 'tub'), ('smoke', 'bronc'),
                           ('smoke', 'lung'), ('lung', 'either'),
                           ('tub', 'either'), ('bronc', 'dysp'),
                           ('either', 'xray'), ('either', 'dysp')])
    # defining the parameters
    cpd_asia = TabularCPD(variable='asia',
                          variable_card=2,
                          values=[[0.01], [0.99]])
    cpd_smoke = TabularCPD(variable='smoke',
                           variable_card=2,
                           values=[[0.5], [0.5]])
    cpd_tub = TabularCPD(variable='tub',
                         variable_card=2,
                         values=[[0.05, 0.99], [0.95, 0.01]],
                         evidence=['asia'],
                         evidence_card=[2])
    cpd_lung = TabularCPD(variable='lung',
                          variable_card=2,
                          values=[[0.1, 0.99], [0.9, 0.01]],
                          evidence=['smoke'],
                          evidence_card=[2])
    cpd_bronc = TabularCPD(variable='bronc',
                           variable_card=2,
                           values=[[0.6, 0.7], [0.4, 0.3]],
                           evidence=['smoke'],
                           evidence_card=[2])
    cpd_either = TabularCPD(variable='either',
                            variable_card=2,
                            values=[[1.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0,
                                                           1.0]],
                            evidence=['lung', 'tub'],
                            evidence_card=[2, 2])
    cpd_xray = TabularCPD(variable='xray',
                          variable_card=2,
                          values=[[0.98, 0.95], [0.02, 0.05]],
                          evidence=['either'],
                          evidence_card=[2])
    cpd_dysp = TabularCPD(variable='dysp',
                          variable_card=2,
                          values=[[0.9, 0.7, 0.8, 0.1], [0.1, 0.3, 0.2, 0.9]],
                          evidence=['bronc', 'either'],
                          evidence_card=[2, 2])
    # Associating the CPDs with the network
    model.add_cpds(cpd_asia, cpd_smoke, cpd_tub, cpd_lung, cpd_bronc,
                   cpd_either, cpd_xray, cpd_dysp)
    model.check_model()
    return model
Exemple #12
0
def create_model_and_inference():
    dep_df = pd.read_csv('dependencies.csv', sep=';')

    def connect(df, source, edgelist):
        source_df = df[df['Column2'] == source]
        for col in source_df.iloc[0, 3:len(source_df.columns)]:
            target_df = df[df['Column1'] == col]['Column2']
            if not target_df.empty:
                target = target_df.item()
                if not (target, source) in edgelist:
                    edgelist.append((source, target))
                    connect(df, target, edgelist)

    edges = []
    connect(dep_df, 'myproximus-usage', edges)
    edges = [(t[1], t[0]) for t in edges]

    nodes = set(itertools.chain.from_iterable(edges))
    nodes_df = dep_df.iloc[:, 1].to_frame()
    nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)]

    nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df = nodes_df.set_index('Column2').transpose()

    model = BayesianModel()
    model.add_nodes_from(nodes)
    for edge in edges:
        try:
            model.add_edge(edge[0], edge[1])
        except:
            print('WARNING: tried to add edge which forms loop: ' + str(edge))

    model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu")
    # for cpd in model.get_cpds():
    #     print(cpd)

    draw_network(model.nodes(), model.edges(), {}, [])

    return model, VariableElimination(model)
Exemple #13
0
 def setUp(self):
     self.m1 = BayesianModel([("A", "C"), ("B", "C")])
     self.d1 = pd.DataFrame(data={"A": [0, 0, 1], "B": [0, 1, 0], "C": [1, 1, 0]})
     self.d2 = pd.DataFrame(
         data={
             "A": [0, 0, 1, 0, 2, 0, 2, 1, 0, 2],
             "B": ["X", "Y", "X", "Y", "X", "Y", "X", "Y", "X", "Y"],
             "C": [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
         }
     )
     self.est1 = BayesianEstimator(self.m1, self.d1)
     self.est2 = BayesianEstimator(
         self.m1, self.d1, state_names={"A": [0, 1, 2], "B": [0, 1], "C": [0, 1, 23]}
     )
     self.est3 = BayesianEstimator(self.m1, self.d2)
 def test_state_names2(self):
     m = BayesianModel([('Light?', 'Color'), ('Fruit', 'Color')])
     d = pd.DataFrame(
         data={
             'Fruit': ['Apple', 'Apple', 'Apple', 'Banana', 'Banana'],
             'Light?': [True, True, False, False, True],
             'Color': ['red', 'green', 'black', 'black', 'yellow']
         })
     color_cpd = TabularCPD(
         'Color',
         4, [[1, 0, 1, 0], [0, 0.5, 0, 0], [0, 0.5, 0, 0], [0, 0, 0, 1]],
         evidence=['Fruit', 'Light?'],
         evidence_card=[2, 2])
     mle2 = MaximumLikelihoodEstimator(m, d)
     self.assertEqual(mle2.estimate_cpd('Color'), color_cpd)
    def __init__(self, parameters=None):
        super().__init__(parameters)

        # set up the network based on the parameters
        self.pgm = BayesianModel(self.parameters['network'])

        import ipdb
        ipdb.set_trace()
        # TODO -- add 'evidence' -- get from network?
        cpds = (TabularCPD(variable=node_id,
                           variable_card=len(values),
                           values=values,
                           evidence=[]) for node_id, values in
                self.parameters['conditional_probabilities'])
        self.pgm.add_cpds(cpds)
 def test_state_names1(self):
     m = BayesianModel([("A", "B")])
     d = pd.DataFrame(data={
         "A": [2, 3, 8, 8, 8],
         "B": ["X", "O", "X", "O", "X"]
     })
     cpd_b = TabularCPD(
         "B",
         2,
         [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
         evidence=["A"],
         evidence_card=[3],
     )
     mle2 = MaximumLikelihoodEstimator(m, d)
     self.assertEqual(mle2.estimate_cpd("B"), cpd_b)
Exemple #17
0
def fully_connected_model(nodes=None):
    if not nodes:
        nodes = [BOREDOM, DESIRE, MOBILE, MOTOR_HYPO, LEFT_ARM]
    network = BayesianModel()
    network.add_nodes_from(nodes)

    for hypo in nodes:
        if 'hypo' in hypo:
            for obs in nodes:
                if 'obs' in obs or 'motor' in obs:
                    network.add_edge(u=hypo, v=obs)

    network.fit(TRAINING_DATA, estimator=BayesianEstimator, prior_type="BDeu")

    return network
 def setUp(self):
     self.m1 = BayesianModel([("A", "C"), ("B", "C"), ("D", "B")])
     self.d1 = DataFrame(data={
         "A": [0, 0, 1],
         "B": [0, 1, 0],
         "C": [1, 1, 0],
         "D": ["X", "Y", "Z"]
     })
     self.d2 = DataFrame(
         data={
             "A": [0, NaN, 1],
             "B": [0, 1, 0],
             "C": [1, 1, NaN],
             "D": [NaN, "Y", NaN],
         })
Exemple #19
0
 def bayes_net_from_populational_data(data, independent_vars,
                                      dependent_vars):
     model = BayesianModel()
     model.add_nodes_from(independent_vars)
     for independent_var in independent_vars:
         for dependent_var in dependent_vars:
             model.add_edge(independent_var, dependent_var)
     cpd_list = []
     state_names = BayesNetHelper.get_state_names_from_df(
         data, independent_vars | dependent_vars)
     for node in independent_vars | dependent_vars:
         cpd = BayesNetHelper.compute_cpd(model, node, data, state_names)
         cpd_list.append(cpd)
     model.add_cpds(*cpd_list)
     return model
Exemple #20
0
 def setUp(self):
     self.m1 = BayesianModel([('A', 'C'), ('B', 'C'), ('D', 'B')])
     self.d1 = DataFrame(data={
         'A': [0, 0, 1],
         'B': [0, 1, 0],
         'C': [1, 1, 0],
         'D': ['X', 'Y', 'Z']
     })
     self.d2 = DataFrame(
         data={
             'A': [0, NaN, 1],
             'B': [0, 1, 0],
             'C': [1, 1, NaN],
             'D': [NaN, 'Y', NaN]
         })
Exemple #21
0
def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR):
    print('Making bayes net')
    graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p'
    if os.path.isfile(graph_file) and load == True:
        print('Loading saved graph from file...')
        G = pickle.load(open(graph_file, 'rb'))
        G.check_model()
    else:
        print('loading data...')
        training_labels, go_dict = load_label_data()
        if subtree:
            labels_list = _subtree_labels()
            print(labels_list)
        else:
            labels_list = go_dict.keys()

        print('adding nodes and edges...')
        G = BayesianModel()
        G.add_edges_from([(label, label + '_hat') for label in labels_list])
        obo_graph = obonet.read_obo(OBODB_FILE)
        for label in labels_list:
            children = [
                c for c in networkx.ancestors(obo_graph, label)
                if c in labels_list
            ]
            for child in children:
                G.add_edge(child, label)

        predicted_cpds = get_model_cpds(labels_list=labels_list,
                                        modelsdir=MODEL_CPDS_DIR)
        for cpd in predicted_cpds:
            G.add_cpds(cpd)
        true_label_cpds = get_true_label_cpds(training_labels,
                                              go_dict,
                                              labels_list=labels_list)
        for cpd in true_label_cpds:
            G.add_cpds(cpd)
        remove_list = []
        for node in G.nodes():
            if G.get_cpds(node) == None:
                remove_list.append(node)
                # remove_list.append(node+'_hat')
        for node in remove_list:
            if node in G:
                G.remove_node(node)
        G.check_model()
        pickle.dump(G, open(graph_file, 'wb'))
    return G
Exemple #22
0
 def __init__(self, pnh, gh):
     '''
     Constructor
     '''
     extractor = pnh.get_data_extractor()
     self.best_model = BayesianModel()
     self.training_instances = ""
     self.device_considered = pnh.get_device()
     self.priority_considered = pnh.get_priority()
     self.markov = MarkovModel()
     self.general_handler = gh
     self.variables_names = extractor.get_variable_names()
     self.rankedDevices = extractor.get_ranked_devices()
     self.data = pnh.get_dataframe()
     self.file_writer = pnh.get_file_writer()
     self.file_suffix = pnh.get_file_suffix()
Exemple #23
0
def create_bayes_net():
    atts = pd.read_csv('../../data/list_attr_celeba.csv')
    atts = atts[KEEP_ATTS]
    graph = BayesianModel()
    graph.add_nodes_from(atts.columns)

    graph.add_edges_from([('Young', 'Eyeglasses'), ('Young', 'Bald'),
                          ('Young', 'Mustache'), ('Male', 'Mustache'),
                          ('Male', 'Smiling'), ('Male', 'Wearing_Lipstick'),
                          ('Young', 'Mouth_Slightly_Open'),
                          ('Young', 'Narrow_Eyes'), ('Male', 'Narrow_Eyes'),
                          ('Smiling', 'Narrow_Eyes'),
                          ('Smiling', 'Mouth_Slightly_Open'),
                          ('Young', 'Smiling')])
    graph.fit(atts)
    return graph
    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, values=np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB',
                            2,
                            values=np.random.rand(2, 2),
                            evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')
        self.assertRaises(ValueError, self.model.get_cpds, 'B')

        self.model.add_node('B')
        self.assertIsNone(self.model.get_cpds('B'))
Exemple #25
0
 def make_bayes_model(self):
     """
     :return: 利用历史信息构建好的贝叶斯网络模型
     """
     model = BayesianModel(self.tree)
     for i in self.pro:
         cpd = self.node_pro(float(i[1]), i[0])
         model.add_cpds(cpd)  # 将各子节点加入贝叶斯网络
     # 根节点
     root_cpd = TabularCPD(
         variable=self.root_code,
         variable_card=2,
         values=[[1 - float(self.root_pro),
                  float(self.root_pro)]])
     model.add_cpds(root_cpd)  # 将根节点加入贝叶斯网络
     return model
def model():
    """
    Define the bayesian model
    """
    #getting the factors
    phi = factors()

    # A model in pgmpy is defined by a list of edges
    edges = [('a', 'b'), ('a', 'e'), ('b', 'c'), ('c', 'd'), ('e', 'd')]

    #creating the model
    M = BayesianModel(edges)
    for cpd in phi:
        M.add_cpds(phi[cpd])

    return M
Exemple #27
0
def generate_approx_model_from_graph(ebunch, nodes, df):
	"""
	Aprende un modelo Bayesiano de pgmpy usando un datos de un
	dataframe de pandas. Primero se hace un barajado de los datos.
	"""
	df = df.sample(frac=1)
	approx_model = BayesianModel(ebunch)
	approx_model.add_nodes_from(nodes)
	state_names = dict()
	for pair in ebunch:
		state_names[pair[0]] = [0, 1]
		state_names[pair[1]] = [0, 1]
	for node in nodes:
		state_names[node] = [0, 1]
	approx_model.fit(df, state_names=state_names, estimator=SmoothedMaximumLikelihoodEstimator)
	return approx_model
    def test_get_cpds1(self):
        self.model = BayesianModel([("A", "AB")])
        cpd_a = TabularCPD("A", 2, values=np.random.rand(2, 1))
        cpd_ab = TabularCPD("AB",
                            2,
                            values=np.random.rand(2, 2),
                            evidence=["A"],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds("A").variable, "A")
        self.assertEqual(self.model.get_cpds("AB").variable, "AB")
        self.assertRaises(ValueError, self.model.get_cpds, "B")

        self.model.add_node("B")
        self.assertIsNone(self.model.get_cpds("B"))
Exemple #29
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model or Markov Model.
        Varibles are in the pattern var_0, var_1, var_2 where var_0 is
        0th index variable, var_1 is 1st index variable.

        Return
        ------
        model: an instance of Bayesian or Markov Model.

        Examples
        --------
        >>> reader = UAIReader('TestUAI.uai')
        >>> reader.get_model()
        """
        if self.network_type == 'BAYES':
            model = BayesianModel()
            model.add_nodes_from(self.variables)
            model.add_edges_from(self.edges)

            tabular_cpds = []
            for cpd in self.tables:
                child_var = cpd[0]
                states = int(self.domain[child_var])
                arr = list(map(float, cpd[1]))
                values = np.array(arr)
                values = values.reshape(states, values.size // states)
                tabular_cpds.append(TabularCPD(child_var, states, values))

            model.add_cpds(*tabular_cpds)
            return model

        elif self.network_type == 'MARKOV':
            model = MarkovModel(self.edges)

            factors = []
            for table in self.tables:
                variables = table[0]
                cardinality = [int(self.domain[var]) for var in variables]
                value = list(map(float, table[1]))
                factor = DiscreteFactor(variables=variables,
                                        cardinality=cardinality,
                                        values=value)
                factors.append(factor)

            model.add_factors(*factors)
            return model
Exemple #30
0
def kNN(k):
    fileName = '';
    dataMat, dataLab = file2matrix(fileName, 9);
    trainMat = dataMat[];
    trainLab = np.array(dataLab[]);
    testMat = dataMat[];
    testLab = np.array(dataLab[]);
    coef = 1;
    distPos = np.zeros((testMat.shape[0], trainMat.shape[0]));
    distTim = np.zeros((testMat.shape[0], trainMat.shape[0]));
    for i in range(testMat.shape[0]):
        for j in range(trainMat.shape[0]):
            distPos[i,j] = distSLC(testMat[i], trainMat[j]);
            distTim[i,j] = disTim(testMat[i], trainMat[j]);
    distPosNor = dataNorm(distPos);
    distTimNor = dataNorm(distTim);
    distAll = distPosNor*coef + distTimNor*(1-coef);
    distIndex = distAll.argsort();
    testI = np.zeros((testMat.shape[0], 4), dtype='int32');
    count = 0;
    for i in testMat[:, 2:6]:
        testI[count,:] = map(int, i);
        count += 1;
    testInput = pd.DataFrame(testI, columns=[]);
    trainMatK = trainMat[distIndex[:,0:k]];
    labelPre = [];
    for i in range(len(trainMatK)):
        num = 0;
        trainI = np.zeros((trainMatK[0].shape[0], 5), dtype='int32');
        for j in trainMatK[i][:, [2,3,4,5,8]]:
            trainI[num, :] = map(int, j);
            num += 1;
        trainFraK = pd.DataFrame(trainI,columns=[]);
        trainInput = trainFraK[[]];
        model = BayesianModel([(),(),(), ()]);
        model.fit(trainInput);
        a = pd.DataFrame([testInput.ix[i].values.tolist()], columns=[]);
        labelPre.append(model.predict(a).values[0][0]);
#     for i in range(len(testLakK)):
#         labels = testLakK[i];
#         labelPre.append(getLabel(labels));
    count = 0;
    #print labelPre;
    for i in range(len(labelPre)):
        if labelPre[i]==testLab[i]:
            count += 1;
    print '准确度:', float(count)/len(testLab);