def make_model_adult(data): # c = PC(data) # model = c.estimate(significance_level=0.05) # bayesian_model = BayesianModel(model.edges) # print(bayesian_model.edges) edges = [('education.num', 'Income'), ('education.num', 'capital.loss'), ('capital.loss', 'Income'), ('capital.gain', 'Income'), ('capital.loss', 'age'), ('capital.loss', 'marital.status'), ('marital.status', 'age'), ('Gender', 'Income'), ('race', 'native.country')] bayesian_model = BayesianModel(edges) return bayesian_model
def create_bayes_net(file, keep_atts, edges): atts = pd.read_csv(file) atts = atts[keep_atts] graph = BayesianModel() graph.add_nodes_from(atts.columns) # defining the structure of edges graph.add_edges_from(edges) # fit estimates the CPD tables for the given structure graph.fit(atts) return graph
def bayeSian(k): fileName = '文件名'; dataMat, dataLab = file2matrix(fileName, 9); durAct = dataMat[]; testMat = dataMat[]; count = 0; # testMat = dataMat[]; testLab = np.array(dataLab[]); trainFraK = pd.DataFrame(dataMat,columns=[columns_name]); trainFra = trainFraK.ix[]; # data_cla0 = trainFraK[trainFraK['T_TYPE']==0].values; # data_cla1 = trainFraK[trainFraK['T_TYPE']==1].values; trainInput = trainFraK[[columns_name]]; trainArr = np.zeros((dataMat.shape[0], 4), dtype='int64'); for arr in trainInput.values: trainArr[count, :]= map(int, arr); count += 1; trainInput = pd.DataFrame(trainArr, columns=[columns_name]); test = trainInput[]; test = test.copy(); test.drop('T_TYPE', axis=1, inplace=True); model = BayesianModel([('columns_name','columns_name'),('columns_name', 'columns_name'), ('columns_name', 'columns_name')]); model.fit(trainInput.ix[]); labelPre = model.predict(test); durPre = []; coef = 0.0; for i in range(len(testMat)): dataSet = trainFra[trainFra['T_TYPE']==labelPre['T_TYPE'][]].values; distPos = np.zeros(dataSet.shape[0]); distTim = np.zeros(dataSet.shape[0]); for j in range(dataSet.shape[0]): distPos[j] = distSLC(testMat[i], dataSet[j]); distTim[j] = disTim(testMat[i], dataSet[j]); distPosNor = distPos;#dataNorm(distPos); distTimNor = dataNorm(distTim); distAll = distPosNor*coef + distTimNor*(1-coef); knnIndex = distAll.argsort(); durKnn = dataSet[knnIndex, 7][:k]; durPre.append(sum(durKnn)/len(durKnn)); mse = calMse(durPre, durAct); mape = calMape(durPre, durAct); count = 0; #print labelPre.values.tolist(); for i in range(len(labelPre)): if labelPre.values[i]==testLab[i]: count += 1; print 'K: ', k; print '准确度: ', float(count)/len(testLab); print 'MSE: ', mse; print 'MAPE: ', mape; print '----------------------------------------------------------------------';
def make_DAG(DAG, CPD=None, checkmodel=True, verbose=3): """Create Directed Acyclic Graph based on list. Parameters ---------- DAG : list list containing source and target in the form of [('A','B'), ('B','C')]. CPD : list, array-like Containing TabularCPD for each node. checkmodel : bool Check the validity of the model. The default is True verbose : int, optional Print progress to screen. The default is 3. 0: None, 1: ERROR, 2: WARN, 3: INFO (default), 4: DEBUG, 5: TRACE Raises ------ Exception Should be list. Returns ------- pgmpy.models.BayesianModel.BayesianModel model of the DAG. """ if (CPD is not None) and (not isinstance(CPD, list)): CPD=[CPD] if isinstance(DAG, dict): DAG = DAG.get('model', None) if (not isinstance(DAG, list)) and ('pgmpy' not in str(type(DAG))): raise Exception("[bnlearn] >Error: Input DAG should be a list. in the form [('A','B'), ('B','C')] or a <pgmpy.models.BayesianModel.BayesianModel>") elif ('pgmpy' in str(type(DAG))): if verbose>=3: print('[bnlearn] >No changes made to existing Bayesian DAG.') elif isinstance(DAG, list): if verbose>=3: print('[bnlearn] >Bayesian DAG created.') DAG = BayesianModel(DAG) if CPD is not None: for cpd in CPD: DAG.add_cpds(cpd) if verbose>=3: print('[bnlearn] >Add CPD: %s' %(cpd.variable)) if checkmodel: print('[bnlearn] >Model correct: %s' %(DAG.check_model())) # Create adjacency matrix from DAG out = {} out['adjmat'] = _dag2adjmat(DAG) out['model'] = DAG return out
def generic_model(coin_set): network = BayesianModel([('A', 'D'), ('B', 'D'), ('C', 'D')]) cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.9, 0.1]]) cpd_b = TabularCPD(variable='B', variable_card=2, values=[[0.1, 0.9]]) cpd_c = TabularCPD(variable='C', variable_card=2, values=[[0.1, 0.9]]) cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.9, 0.9], [0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.1, 0.1]], evidence=['A', 'B', 'C'], evidence_card=[2, 2, 2]) network.add_cpds(cpd_a, cpd_b, cpd_c, cpd_d) network.check_model() model = GenerativeModel(SensoryInputCoin(coin_set), network) return model
def get_model(self): """ Returns the model instance of the ProbModel. Return --------------- model: an instance of BayesianModel. Examples ------- >>> reader = ProbModelXMLReader() >>> reader.get_model() """ if self.probnet.get('type') == "BayesianNetwork": model = BayesianModel(self.probnet['edges'].keys()) tabular_cpds = [] cpds = self.probnet['Potentials'] for cpd in cpds: var = list(cpd['Variables'].keys())[0] states = self.probnet['Variables'][var]['States'] evidence = cpd['Variables'][var] evidence_card = [ len(self.probnet['Variables'][evidence_var]['States']) for evidence_var in evidence ] arr = list(map(float, cpd['Values'].split())) values = np.array(arr) values = values.reshape( (len(states), values.size // len(states))) tabular_cpds.append( TabularCPD(var, len(states), values, evidence, evidence_card)) model.add_cpds(*tabular_cpds) variables = model.nodes() for var in variables: for prop_name, prop_value in self.probnet['Variables'][ var].items(): model.node[var][prop_name] = prop_value edges = model.edges() for edge in edges: for prop_name, prop_value in self.probnet['edges'][edge].items( ): model.edge[edge[0]][edge[1]][prop_name] = prop_value return model else: raise ValueError("Please specify only Bayesian Network.")
def pgmpy_test2(): # example from https://github.com/pgmpy/pgmpy/blob/dev/examples/Learning%20from%20data.ipynb # Generating radom data with each variable have 2 states and equal probabilities for each state raw_data = np.random.randint(low=0, high=2, size=(1000, 5)) data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S']) model = BayesianModel([('D', 'G'), ('I', 'G'), ('I', 'S'), ('G', 'L')]) # Learing CPDs using Maximum Likelihood Estimators model.fit(data, estimator=MaximumLikelihoodEstimator) for cpd in model.get_cpds(): print("CPD of {variable}:".format(variable=cpd.variable)) print(cpd)
def make_model_adult(data): # c = PC(data) # model = c.estimate(significance_level=0.05) # bayesian_model = BayesianModel(model.edges) # print(bayesian_model.edges) edges = [('Gender', 'Income'), ('Gender', 'marital.status_0'), ('marital.status_0', 'Income'), ('Gender', 'workclass_1'), ('education.num', 'Income'), ('age', 'marital.status_1'), ('capital.gain', 'Income'), ('capital.gain', 'hours.per.week'), ('capital.loss', 'Income'), ('age', 'workclass_0'), ('age', 'hours.per.week'), ('native.country_1', 'education.num'), ('native.country_0', 'education.num')] bayesian_model = BayesianModel(edges) return bayesian_model
def __init__(self, case): self.case = case self.results = [] self.networx_test = nx.DiGraph() self.pgmpy_test = BayesianModel() self.networx = nx.DiGraph() self.pgmpy = BayesianModel() self.best_error = math.inf self.best_topology = [0,0,nx.DiGraph] self.dictionary = [] self.header = {} self.nodes_0 = [] self.edges_0 = {} self.nodes = [] self.edges = {} self.cpds = {} self.colors_dictionary ={} self.colors_table =[] self.colors_cpd = [] self.learning_data = {} self.nummber_of_colors = 0 self._util = Utilities(case) self._lat = Lattices(self._util)
def __init__(self, current_loc): self.anchor_objs = ['table', 'man'] self.men_objs = ['hand', 'head', 'hat', 'bed'] self.table_objs = ['laptop', 'banana', 'book', 'chair', 'paper'] self.current_loc_objs = self.read_csv(current_folder / (current_loc + '.csv')) # Defining the model structure. We can define the network by just passing a list of edges. #print (detected) #model = BayesianModel([('book', 'table'), ('I', 'G'), ('G', 'L'), ('I', 'S')]) #model = BayesianModel(detected) #allfiles =list((current_folder).glob('*.csv')) #print (get_all_objs(allfiles)) #input() #all_objs, detected = read_csv(current_folder/'0.csv') #print (all_objs) '''bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) ''' cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) ''' cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) bayesian_model.add_cpds(cpd_a, cpd_r, cpd_j, cpd_q, cpd_l, cpd_g) belief_propagation = BeliefPropagation(bayesian_model) print (dir(belief_propagation.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}))) print (belief_propagation.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}).values) ''' all_arcs = itertools.product(self.men_objs + self.table_objs, ['table']) #all_arcs = itertools.product(anchor_objs, men_objs) self.model = BayesianModel(all_arcs) self.build_cpds()
def createBayesianNetwork(): # defining the network structure model = BayesianModel([('asia', 'tub'), ('smoke', 'bronc'), ('smoke', 'lung'), ('lung', 'either'), ('tub', 'either'), ('bronc', 'dysp'), ('either', 'xray'), ('either', 'dysp')]) # defining the parameters cpd_asia = TabularCPD(variable='asia', variable_card=2, values=[[0.01], [0.99]]) cpd_smoke = TabularCPD(variable='smoke', variable_card=2, values=[[0.5], [0.5]]) cpd_tub = TabularCPD(variable='tub', variable_card=2, values=[[0.05, 0.99], [0.95, 0.01]], evidence=['asia'], evidence_card=[2]) cpd_lung = TabularCPD(variable='lung', variable_card=2, values=[[0.1, 0.99], [0.9, 0.01]], evidence=['smoke'], evidence_card=[2]) cpd_bronc = TabularCPD(variable='bronc', variable_card=2, values=[[0.6, 0.7], [0.4, 0.3]], evidence=['smoke'], evidence_card=[2]) cpd_either = TabularCPD(variable='either', variable_card=2, values=[[1.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]], evidence=['lung', 'tub'], evidence_card=[2, 2]) cpd_xray = TabularCPD(variable='xray', variable_card=2, values=[[0.98, 0.95], [0.02, 0.05]], evidence=['either'], evidence_card=[2]) cpd_dysp = TabularCPD(variable='dysp', variable_card=2, values=[[0.9, 0.7, 0.8, 0.1], [0.1, 0.3, 0.2, 0.9]], evidence=['bronc', 'either'], evidence_card=[2, 2]) # Associating the CPDs with the network model.add_cpds(cpd_asia, cpd_smoke, cpd_tub, cpd_lung, cpd_bronc, cpd_either, cpd_xray, cpd_dysp) model.check_model() return model
def create_model_and_inference(): dep_df = pd.read_csv('dependencies.csv', sep=';') def connect(df, source, edgelist): source_df = df[df['Column2'] == source] for col in source_df.iloc[0, 3:len(source_df.columns)]: target_df = df[df['Column1'] == col]['Column2'] if not target_df.empty: target = target_df.item() if not (target, source) in edgelist: edgelist.append((source, target)) connect(df, target, edgelist) edges = [] connect(dep_df, 'myproximus-usage', edges) edges = [(t[1], t[0]) for t in edges] nodes = set(itertools.chain.from_iterable(edges)) nodes_df = dep_df.iloc[:, 1].to_frame() nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)] nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df = nodes_df.set_index('Column2').transpose() model = BayesianModel() model.add_nodes_from(nodes) for edge in edges: try: model.add_edge(edge[0], edge[1]) except: print('WARNING: tried to add edge which forms loop: ' + str(edge)) model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu") # for cpd in model.get_cpds(): # print(cpd) draw_network(model.nodes(), model.edges(), {}, []) return model, VariableElimination(model)
def setUp(self): self.m1 = BayesianModel([("A", "C"), ("B", "C")]) self.d1 = pd.DataFrame(data={"A": [0, 0, 1], "B": [0, 1, 0], "C": [1, 1, 0]}) self.d2 = pd.DataFrame( data={ "A": [0, 0, 1, 0, 2, 0, 2, 1, 0, 2], "B": ["X", "Y", "X", "Y", "X", "Y", "X", "Y", "X", "Y"], "C": [1, 1, 1, 0, 0, 0, 0, 0, 0, 0], } ) self.est1 = BayesianEstimator(self.m1, self.d1) self.est2 = BayesianEstimator( self.m1, self.d1, state_names={"A": [0, 1, 2], "B": [0, 1], "C": [0, 1, 23]} ) self.est3 = BayesianEstimator(self.m1, self.d2)
def test_state_names2(self): m = BayesianModel([('Light?', 'Color'), ('Fruit', 'Color')]) d = pd.DataFrame( data={ 'Fruit': ['Apple', 'Apple', 'Apple', 'Banana', 'Banana'], 'Light?': [True, True, False, False, True], 'Color': ['red', 'green', 'black', 'black', 'yellow'] }) color_cpd = TabularCPD( 'Color', 4, [[1, 0, 1, 0], [0, 0.5, 0, 0], [0, 0.5, 0, 0], [0, 0, 0, 1]], evidence=['Fruit', 'Light?'], evidence_card=[2, 2]) mle2 = MaximumLikelihoodEstimator(m, d) self.assertEqual(mle2.estimate_cpd('Color'), color_cpd)
def __init__(self, parameters=None): super().__init__(parameters) # set up the network based on the parameters self.pgm = BayesianModel(self.parameters['network']) import ipdb ipdb.set_trace() # TODO -- add 'evidence' -- get from network? cpds = (TabularCPD(variable=node_id, variable_card=len(values), values=values, evidence=[]) for node_id, values in self.parameters['conditional_probabilities']) self.pgm.add_cpds(cpds)
def test_state_names1(self): m = BayesianModel([("A", "B")]) d = pd.DataFrame(data={ "A": [2, 3, 8, 8, 8], "B": ["X", "O", "X", "O", "X"] }) cpd_b = TabularCPD( "B", 2, [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]], evidence=["A"], evidence_card=[3], ) mle2 = MaximumLikelihoodEstimator(m, d) self.assertEqual(mle2.estimate_cpd("B"), cpd_b)
def fully_connected_model(nodes=None): if not nodes: nodes = [BOREDOM, DESIRE, MOBILE, MOTOR_HYPO, LEFT_ARM] network = BayesianModel() network.add_nodes_from(nodes) for hypo in nodes: if 'hypo' in hypo: for obs in nodes: if 'obs' in obs or 'motor' in obs: network.add_edge(u=hypo, v=obs) network.fit(TRAINING_DATA, estimator=BayesianEstimator, prior_type="BDeu") return network
def setUp(self): self.m1 = BayesianModel([("A", "C"), ("B", "C"), ("D", "B")]) self.d1 = DataFrame(data={ "A": [0, 0, 1], "B": [0, 1, 0], "C": [1, 1, 0], "D": ["X", "Y", "Z"] }) self.d2 = DataFrame( data={ "A": [0, NaN, 1], "B": [0, 1, 0], "C": [1, 1, NaN], "D": [NaN, "Y", NaN], })
def bayes_net_from_populational_data(data, independent_vars, dependent_vars): model = BayesianModel() model.add_nodes_from(independent_vars) for independent_var in independent_vars: for dependent_var in dependent_vars: model.add_edge(independent_var, dependent_var) cpd_list = [] state_names = BayesNetHelper.get_state_names_from_df( data, independent_vars | dependent_vars) for node in independent_vars | dependent_vars: cpd = BayesNetHelper.compute_cpd(model, node, data, state_names) cpd_list.append(cpd) model.add_cpds(*cpd_list) return model
def setUp(self): self.m1 = BayesianModel([('A', 'C'), ('B', 'C'), ('D', 'B')]) self.d1 = DataFrame(data={ 'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0], 'D': ['X', 'Y', 'Z'] }) self.d2 = DataFrame( data={ 'A': [0, NaN, 1], 'B': [0, 1, 0], 'C': [1, 1, NaN], 'D': [NaN, 'Y', NaN] })
def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR): print('Making bayes net') graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p' if os.path.isfile(graph_file) and load == True: print('Loading saved graph from file...') G = pickle.load(open(graph_file, 'rb')) G.check_model() else: print('loading data...') training_labels, go_dict = load_label_data() if subtree: labels_list = _subtree_labels() print(labels_list) else: labels_list = go_dict.keys() print('adding nodes and edges...') G = BayesianModel() G.add_edges_from([(label, label + '_hat') for label in labels_list]) obo_graph = obonet.read_obo(OBODB_FILE) for label in labels_list: children = [ c for c in networkx.ancestors(obo_graph, label) if c in labels_list ] for child in children: G.add_edge(child, label) predicted_cpds = get_model_cpds(labels_list=labels_list, modelsdir=MODEL_CPDS_DIR) for cpd in predicted_cpds: G.add_cpds(cpd) true_label_cpds = get_true_label_cpds(training_labels, go_dict, labels_list=labels_list) for cpd in true_label_cpds: G.add_cpds(cpd) remove_list = [] for node in G.nodes(): if G.get_cpds(node) == None: remove_list.append(node) # remove_list.append(node+'_hat') for node in remove_list: if node in G: G.remove_node(node) G.check_model() pickle.dump(G, open(graph_file, 'wb')) return G
def __init__(self, pnh, gh): ''' Constructor ''' extractor = pnh.get_data_extractor() self.best_model = BayesianModel() self.training_instances = "" self.device_considered = pnh.get_device() self.priority_considered = pnh.get_priority() self.markov = MarkovModel() self.general_handler = gh self.variables_names = extractor.get_variable_names() self.rankedDevices = extractor.get_ranked_devices() self.data = pnh.get_dataframe() self.file_writer = pnh.get_file_writer() self.file_suffix = pnh.get_file_suffix()
def create_bayes_net(): atts = pd.read_csv('../../data/list_attr_celeba.csv') atts = atts[KEEP_ATTS] graph = BayesianModel() graph.add_nodes_from(atts.columns) graph.add_edges_from([('Young', 'Eyeglasses'), ('Young', 'Bald'), ('Young', 'Mustache'), ('Male', 'Mustache'), ('Male', 'Smiling'), ('Male', 'Wearing_Lipstick'), ('Young', 'Mouth_Slightly_Open'), ('Young', 'Narrow_Eyes'), ('Male', 'Narrow_Eyes'), ('Smiling', 'Narrow_Eyes'), ('Smiling', 'Mouth_Slightly_Open'), ('Young', 'Smiling')]) graph.fit(atts) return graph
def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, values=np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, values=np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') self.assertRaises(ValueError, self.model.get_cpds, 'B') self.model.add_node('B') self.assertIsNone(self.model.get_cpds('B'))
def make_bayes_model(self): """ :return: 利用历史信息构建好的贝叶斯网络模型 """ model = BayesianModel(self.tree) for i in self.pro: cpd = self.node_pro(float(i[1]), i[0]) model.add_cpds(cpd) # 将各子节点加入贝叶斯网络 # 根节点 root_cpd = TabularCPD( variable=self.root_code, variable_card=2, values=[[1 - float(self.root_pro), float(self.root_pro)]]) model.add_cpds(root_cpd) # 将根节点加入贝叶斯网络 return model
def model(): """ Define the bayesian model """ #getting the factors phi = factors() # A model in pgmpy is defined by a list of edges edges = [('a', 'b'), ('a', 'e'), ('b', 'c'), ('c', 'd'), ('e', 'd')] #creating the model M = BayesianModel(edges) for cpd in phi: M.add_cpds(phi[cpd]) return M
def generate_approx_model_from_graph(ebunch, nodes, df): """ Aprende un modelo Bayesiano de pgmpy usando un datos de un dataframe de pandas. Primero se hace un barajado de los datos. """ df = df.sample(frac=1) approx_model = BayesianModel(ebunch) approx_model.add_nodes_from(nodes) state_names = dict() for pair in ebunch: state_names[pair[0]] = [0, 1] state_names[pair[1]] = [0, 1] for node in nodes: state_names[node] = [0, 1] approx_model.fit(df, state_names=state_names, estimator=SmoothedMaximumLikelihoodEstimator) return approx_model
def test_get_cpds1(self): self.model = BayesianModel([("A", "AB")]) cpd_a = TabularCPD("A", 2, values=np.random.rand(2, 1)) cpd_ab = TabularCPD("AB", 2, values=np.random.rand(2, 2), evidence=["A"], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds("A").variable, "A") self.assertEqual(self.model.get_cpds("AB").variable, "AB") self.assertRaises(ValueError, self.model.get_cpds, "B") self.model.add_node("B") self.assertIsNone(self.model.get_cpds("B"))
def get_model(self): """ Returns an instance of Bayesian Model or Markov Model. Varibles are in the pattern var_0, var_1, var_2 where var_0 is 0th index variable, var_1 is 1st index variable. Return ------ model: an instance of Bayesian or Markov Model. Examples -------- >>> reader = UAIReader('TestUAI.uai') >>> reader.get_model() """ if self.network_type == 'BAYES': model = BayesianModel() model.add_nodes_from(self.variables) model.add_edges_from(self.edges) tabular_cpds = [] for cpd in self.tables: child_var = cpd[0] states = int(self.domain[child_var]) arr = list(map(float, cpd[1])) values = np.array(arr) values = values.reshape(states, values.size // states) tabular_cpds.append(TabularCPD(child_var, states, values)) model.add_cpds(*tabular_cpds) return model elif self.network_type == 'MARKOV': model = MarkovModel(self.edges) factors = [] for table in self.tables: variables = table[0] cardinality = [int(self.domain[var]) for var in variables] value = list(map(float, table[1])) factor = DiscreteFactor(variables=variables, cardinality=cardinality, values=value) factors.append(factor) model.add_factors(*factors) return model
def kNN(k): fileName = ''; dataMat, dataLab = file2matrix(fileName, 9); trainMat = dataMat[]; trainLab = np.array(dataLab[]); testMat = dataMat[]; testLab = np.array(dataLab[]); coef = 1; distPos = np.zeros((testMat.shape[0], trainMat.shape[0])); distTim = np.zeros((testMat.shape[0], trainMat.shape[0])); for i in range(testMat.shape[0]): for j in range(trainMat.shape[0]): distPos[i,j] = distSLC(testMat[i], trainMat[j]); distTim[i,j] = disTim(testMat[i], trainMat[j]); distPosNor = dataNorm(distPos); distTimNor = dataNorm(distTim); distAll = distPosNor*coef + distTimNor*(1-coef); distIndex = distAll.argsort(); testI = np.zeros((testMat.shape[0], 4), dtype='int32'); count = 0; for i in testMat[:, 2:6]: testI[count,:] = map(int, i); count += 1; testInput = pd.DataFrame(testI, columns=[]); trainMatK = trainMat[distIndex[:,0:k]]; labelPre = []; for i in range(len(trainMatK)): num = 0; trainI = np.zeros((trainMatK[0].shape[0], 5), dtype='int32'); for j in trainMatK[i][:, [2,3,4,5,8]]: trainI[num, :] = map(int, j); num += 1; trainFraK = pd.DataFrame(trainI,columns=[]); trainInput = trainFraK[[]]; model = BayesianModel([(),(),(), ()]); model.fit(trainInput); a = pd.DataFrame([testInput.ix[i].values.tolist()], columns=[]); labelPre.append(model.predict(a).values[0][0]); # for i in range(len(testLakK)): # labels = testLakK[i]; # labelPre.append(getLabel(labels)); count = 0; #print labelPre; for i in range(len(labelPre)): if labelPre[i]==testLab[i]: count += 1; print '准确度:', float(count)/len(testLab);