def estimate_distrib(skel, samples, query, evidence): learner = PGMLearner() bayesnet = learner.discrete_mle_estimateparams(skel, samples) tablecpd = TableCPDFactorization(bayesnet) fac = tablecpd.condprobve(query, evidence) df2 = printdist(fac, bayesnet) return df2
def bayesNetCont(textFile,unique): cleanText(textFile,'tempOutput.txt') ## imports textFile into pandas try: df = pd.read_csv('tempOutput.txt', sep='\s+',dtype='float64',header=None) except: print 'next file' return df.fillna(0, inplace=True) df.convert_objects(convert_numeric=True) ## set to either setUnique() or setMax() if unique is True: grouped = setUnique(df) else: grouped = setMax(df) #turns into correct dictionary format for libpgm newDict = DFtoLibpgm(grouped) # instantiate my learner learner = PGMLearner() # estimate structure #gaussian try: result = learner.lg_constraint_estimatestruct(newDict) except: print 'error' return # output return result
def learn_net(data): '''learns Bayes net on raw data''' data_dict = data.to_dict('records') learner = PGMLearner() skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1) skel.toporder() disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict) return disc_bayes_net
def learn_net_discretize(data, vars_to_discretize, n_bins): '''learn Bayes net after selected variables have been discretized''' data_subset, bins = discretize(data, vars_to_discretize, n_bins=n_bins) data_dict = data_subset.to_dict('records') learner = PGMLearner() skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1) skel.toporder() disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict) return disc_bayes_net, bins
def anomaly_libpgm(): files = glob.glob(join('data', '*.txt')) for file in files[0:1]: print file data=read_data_libpgm(file) learner = PGMLearner() result=learner.lg_estimatebn(data, indegree=3) print result.E
def test_libpgm(df1): data = df1.T.to_dict().values() #pprint(data) skel = GraphSkeleton() skel.load("bn_struct.txt") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, data) print json.dumps(result.Vdata, indent=2)
def learnBN(fdata_array, bn_file): bn_path = os.path.join(experiment_dir, 'parameters', bn_file + '.txt') skel = GraphSkeleton() skel.load(bn_path) skel.toporder() learner = PGMLearner() bn = learner.discrete_mle_estimateparams(skel, fdata_array) return bn
def getBNparams(graph, ddata, n): # Gets Disc. BN parameters given a graph skeleton #skeleton should include t-1 and t nodes for each variable nodes = range(1, (n * 2) + 1) nodes = map(str, nodes) edges = gk.edgelist(graph) for i in range(len(edges)): edges[i] = list([edges[i][0], str(n + int(edges[i][1]))]) skel = GraphSkeleton() skel.V = nodes skel.E = edges learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, ddata) return result
def __init__(self): self.learner = PGMLearner() rospy.Service("~discrete/parameter_estimation", DiscreteParameterEstimation, self.discrete_parameter_estimation_cb) rospy.Service("~discrete/query", DiscreteQuery, self.discrete_query_cb) rospy.Service("~discrete/structure_estimation", DiscreteStructureEstimation, self.discrete_structure_estimation_cb) rospy.Service("~linear_gaussian/parameter_estimation", LinearGaussianParameterEstimation, self.lg_parameter_estimation_cb) rospy.Service("~linear_gaussian/structure_estimation", LinearGaussianStructureEstimation, self.lg_structure_estimation_cb)
def createData(): nd = NodeData() skel = GraphSkeleton() fpath = "job_interview.txt" nd.load(fpath) skel.load(fpath) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) learner = PGMLearner() data = bn.randomsample(1000) X, Y = 'Grades', 'Offer' c,p,w=learner.discrete_condind(data, X, Y, ['Interview']) print "independence between X and Y: ", c, " p-value ", p, " witness node: ", w result = learner.discrete_constraint_estimatestruct(data) print result.E
def em(data,bn,skel): lk_last=100 times=0 while 1: d2=data_with_hidden(data,bn) learner = PGMLearner()#toolbox bn=learner.discrete_mle_estimateparams(skel,d2)#toolbox lk=likelihood(d2,bn) print "LogLikelihood:", lk times +=1 if abs((lk-lk_last)/lk_last)<0.001: break lk_last=lk print times return bn
def em(data, bn, skel): lk_last = 100 times = 0 while 1: d2 = data_with_hidden(data, bn) learner = PGMLearner() #toolbox bn = learner.discrete_mle_estimateparams(skel, d2) #toolbox lk = likelihood(d2, bn) print "LogLikelihood:", lk times += 1 if abs((lk - lk_last) / lk_last) < 0.001: break lk_last = lk print times return bn
def buildBN(trainingData, binstyleDict, numbinsDict, **kwargs): # need to modify to accept skel or skelfile discretized_training_data, bin_ranges = discretizeTrainingData( trainingData, binstyleDict, numbinsDict, True) print 'discret training ', discretized_training_data if 'skel' in kwargs: # load file into skeleton if isinstance(kwargs['skel'], basestring): skel = GraphSkeleton() skel.load(kwargs['skel']) skel.toporder() else: skel = kwargs['skel'] # learn bayesian network learner = PGMLearner() # baynet = learner.discrete_mle_estimateparams(skel, discretized_training_data) # baynet = discrete_estimatebn(learner, discretized_training_data, skel, 0.05, 1) baynet = discrete_mle_estimateparams2( skel, discretized_training_data ) # using discrete_mle_estimateparams2 written as function in this file, not calling from libpgm return baynet
def net2(): nd = NodeData() skel = GraphSkeleton() nd.load("net.txt") # an input file skel.load("net.txt") # topologically order graphskeleton skel.toporder() # load bayesian network lgbn = LGBayesianNetwork(skel, nd) in_data=read_data.getdata2() learner = PGMLearner() bn=learner.lg_mle_estimateparams(skel,in_data) p=cal_prob(in_data[300:500],bn) print p return 0
def net2(): nd = NodeData() skel = GraphSkeleton() nd.load("net.txt") # an input file skel.load("net.txt") # topologically order graphskeleton skel.toporder() # load bayesian network lgbn = LGBayesianNetwork(skel, nd) in_data = read_data.getdata2() learner = PGMLearner() bn = learner.lg_mle_estimateparams(skel, in_data) p = cal_prob(in_data[300:500], bn) print p return 0
def bayesNet(textFile): cleanText(textFile, 'tempOutput.txt') ## imports textFile into pandas try: df = pd.read_csv('tempOutput.txt', sep='\s+', dtype='float32', header=None) except: print 'next file' return df.fillna(0, inplace=True) df.convert_objects(convert_numeric=True) ## for i, row in df.iterrows(): print df.ix[0, i] df.ix[0, i] = df.ix[0, i] + str(i) grouped = df.set_index([0], verify_integrity=True) df2 = grouped.to_dict() print json.dumps(df2, indent=2) newDict = [] for key in df2.keys(): newDict.append(df2[key]) #print json.dumps(newDict, indent=2) # instantiate my learner learner = PGMLearner() # estimate structure result = learner.lg_constraint_estimatestruct(newDict) # output return json.dumps(result.E, indent=2)
def setUp(self): # instantiate learner self.l = PGMLearner() # generate graph skeleton skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() # generate sample sequence to try to learn from - discrete nd = NodeData.load("unittestdict.txt") self.samplediscbn = DiscreteBayesianNetwork(nd) self.samplediscseq = self.samplediscbn.randomsample(5000) # generate sample sequence to try to learn from - discrete nda = NodeData.load("unittestlgdict.txt") self.samplelgbn = LGBayesianNetwork(nda) self.samplelgseq = self.samplelgbn.randomsample(10000) self.skel = skel
def calc_accuracy(dff_train, dff_train_target, nb_iterations): result = np.zeros(nb_iterations) for itera in range(nb_iterations): XX_train, XX_test, yy_train, yy_test = train_test_split(dff_train, dff_train_target, test_size=0.33) data4bn = format_data(XX_train) learner = PGMLearner() # estimate parameters result_bn = learner.discrete_mle_estimateparams(skel, data4bn) #result_bn.Vdata result_predict = calc_BNprob(XX_test) BN_test_probs = pd.DataFrame() BN_test_probs['ground_truth'] = yy_test Test_prob = pd.concat([yy_test.reset_index().Surv, result_predict], axis = 1, ignore_index = True) .rename(columns = {0:'ground_truth' , 1:'class_resu'}) accuracy = Test_prob[Test_prob.ground_truth == Test_prob.class_resu].shape[0]/(1.0*Test_prob.shape[0]) #print("Accuracy is {}").format(accuracy) result[itera] = accuracy return result
def bayesNetDiscrete(textFile, quant_no, unique): cleanText(textFile, 'tempOutput.txt') ## imports textFile into pandas try: df = pd.read_csv('tempOutput.txt', sep='\s+', dtype='float64', header=None) except: print 'next file' return df.fillna(0, inplace=True) df.convert_objects(convert_numeric=True) ## set to either setUnique() or setMax() if unique is True: grouped = setUnique(df) else: grouped = setMax(df) ## quantiles is qcut(), fixed width divisions is cut grouped = quantize(quant_no, grouped) #turns into correct dictionary format for libpgm newDict = DFtoLibpgm(grouped) # instantiate my learner learner = PGMLearner() # estimate structure try: result = learner.discrete_estimatebn(newDict) except: print 'error' #result = learner.discrete_estimatebn([dict([('a',1),('b',2)])]) return # output return result
def bayesNet(textFile): cleanText(textFile,'tempOutput.txt') ## imports textFile into pandas try: df = pd.read_csv('tempOutput.txt', sep='\s+',dtype='float32',header=None) except: print 'next file' return df.fillna(0, inplace=True) df.convert_objects(convert_numeric=True) ## for i, row in df.iterrows(): print df.ix[0,i] df.ix[0,i] = df.ix[0,i] + str(i) grouped = df.set_index([0], verify_integrity=True) df2 = grouped.to_dict() print json.dumps(df2, indent=2) newDict = [] for key in df2.keys(): newDict.append(df2[key]) #print json.dumps(newDict, indent=2) # instantiate my learner learner = PGMLearner() # estimate structure result = learner.lg_constraint_estimatestruct(newDict) # output return json.dumps(result.E, indent=2)
def bayesNetDiscrete(textFile,quant_no,unique): cleanText(textFile,'tempOutput.txt') ## imports textFile into pandas try: df = pd.read_csv('tempOutput.txt', sep='\s+',dtype='float64',header=None) except: print 'next file' return df.fillna(0, inplace=True) df.convert_objects(convert_numeric=True) ## set to either setUnique() or setMax() if unique is True: grouped = setUnique(df) else: grouped = setMax(df) ## quantiles is qcut(), fixed width divisions is cut grouped = quantize(quant_no,grouped) #turns into correct dictionary format for libpgm newDict = DFtoLibpgm(grouped) # instantiate my learner learner = PGMLearner() # estimate structure try: result = learner.discrete_estimatebn(newDict) except: print 'error' #result = learner.discrete_estimatebn([dict([('a',1),('b',2)])]) return # output return result
def bayesNetCont(textFile, unique): cleanText(textFile, 'tempOutput.txt') ## imports textFile into pandas try: df = pd.read_csv('tempOutput.txt', sep='\s+', dtype='float64', header=None) except: print 'next file' return df.fillna(0, inplace=True) df.convert_objects(convert_numeric=True) ## set to either setUnique() or setMax() if unique is True: grouped = setUnique(df) else: grouped = setMax(df) #turns into correct dictionary format for libpgm newDict = DFtoLibpgm(grouped) # instantiate my learner learner = PGMLearner() # estimate structure #gaussian try: result = learner.lg_constraint_estimatestruct(newDict) except: print 'error' return # output return result
def main(): # filename features_file = './../data/features.csv' # read data into list handwriting_features = postmaster.readCSVIntoListAsDict(features_file) # learn structure # instantiate learner learner = PGMLearner() pvalue = 0.25 indegree = 1 # estimate structure #result = learner.discrete_constraint_estimatestruct( # handwriting_features, pvalue, indegree) result = learner.discrete_estimatebn(handwriting_features) #result = learner.discrete_condind(handwriting_features, 'f1', 'f2', # ['f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9']) # output #print result.chi, result.pval, result.U #print json.dumps(result.E, indent=2) print json.dumps(result.Vdata, indent=2)
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.lgbayesiannetwork import LGBayesianNetwork from libpgm.pgmlearner import PGMLearner # generate some data to use nd = NodeData() nd.load("gaussGrades.txt") # an input file skel = GraphSkeleton() skel.load("gaussGrades.txt") skel.toporder() lgbn = LGBayesianNetwork(skel, nd) data = lgbn.randomsample(8000) print data # instantiate my learner learner = PGMLearner() # estimate structure result = learner.lg_constraint_estimatestruct(data) # output print json.dumps(result.E, indent=2)
@author: himanshu ''' import json from networkx import DiGraph, draw from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner import matplotlib.pyplot as plt from data_extractor import DataExtractor # generate some data to use data_ext = DataExtractor('genome', format = 'json') data = data_ext.get_data_vectors() print 'Got data with ', len(data), ' vectors' # instantiate my learner learner = PGMLearner() print 'learning the structure' # estimate structure result = learner.discrete_constraint_estimatestruct(data, pvalparam = 0.02) # output print json.dumps(result.E, indent = 2) graph = DiGraph() graph.add_edges_from(result.E) draw(graph) plt.show()
# vertices = set(mainFeatures) # for i, sample in enumerate(training_arr): # newSample = {} # newSample['HIV'] = hiv_training_arr[i] # for k in sample.keys(): # if k in vertices: # newSample[k] = sample[k] # condensed_feature_vectors.append(newSample) ################################################ # import pprint # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(condensed_feature_vectors) # instantiate learner learner = PGMLearner() # Voila, it makes us a bayesian network! bayesian_networks_by_region = {} for region in condensed_feature_vectors_by_region: bayesian_networks_by_region[region] = learner.lg_estimatebn(condensed_feature_vectors_by_region[region]) print region print json.dumps(bayesian_networks_by_region[region].Vdata, indent=2) print json.dumps(bayesian_networks_by_region[region].E, indent=2) #Evaluation: predictions = [] test_arrs_by_region = {} hiv_test_arrs_by_region = {} for i, sample in enumerate(test_arr): region = getRegion(sample['Country'])
# output - toggle comment to see #print json.dumps(result, indent=2) # (8) -------------------------------------------------------------------------- # Learn the CPDs of a discrete Bayesian network, given data and a structure: # say I have some data data = bn.randomsample(200) # and a graphskeleton skel = GraphSkeleton() skel.load("../tests/unittestdict.txt") # instantiate my learner learner = PGMLearner() # estimate parameters result = learner.discrete_mle_estimateparams(skel, data) # output - toggle comment to see #print json.dumps(result.Vdata, indent=2) # (9) ------------------------------------------------------------------------- # Learn the structure of a discrete Bayesian network, given only data: # say I have some data data = bn.randomsample(2000) # instantiate my learner learner = PGMLearner()
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner nd = NodeData() nd.load("nodedata.json") skel = GraphSkeleton() skel.load("nodedata.json") skel.toporder() bn = DiscreteBayesianNetwork(skel,nd) with open("manipulatedata.json") as fp: data = json.load(fp) learner = PGMLearner() # result = learner.discrete_constraint_estimatestruct(data) result = learner.discrete_estimatebn(data) print json.dumps(result.E, indent=2) print json.dumps(result.Vdata, indent=2)
or di['HandStrength'] == '3ofakind'): di['HandStrength'] = 'Medium' elif (di['HandStrength'] == 'TwoPairs' or (di['HandStrength'] == 'OnePair' and (di['Rank'] == 'A' or di['Rank'] == 'K' or di['Rank'] == 'Q' or di['Rank'] == 'J'))): di['HandStrength'] = 'Weak' else: di['HandStrength'] = 'VeryWeak' print('################### PART A ################################') print('*************** Network Parameters for BN agent1*****************') for skeleton in ["Poker_Network.txt" ]: # loading skeleton of Network from given-file skel = GraphSkeleton() skel.load(skeleton) learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, mdata) print json.dumps(result.Vdata, indent=2) print('*************** Network Parameters for BN agent2*****************') for skeleton in ["Poker_Network.txt"]: skel = GraphSkeleton() skel.load(skeleton) learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, mdata2) print json.dumps(result.Vdata, indent=2) print('########################## PART B ################################') print('************** NB1 parameters for agent 1 *************************') for skeleton in ["Poker_Network1.txt"]: skel = GraphSkeleton()
[{'Class': 3, 'Fare': 0, 'Sex': 1, 'Surv': 0}, {'Class': 1, 'Fare': 1, 'Sex': 0, 'Surv': 1}, {'Class': 3, 'Fare': 0, 'Sex': 0, 'Surv': 1}, {'Class': 1, 'Fare': 1, 'Sex': 0, 'Surv': 1},...] # In[ ]: nd = NodeData() skel = GraphSkeleton() #The structure is defined in the file titanic_skel jsonpath ="titanic_skel.json" skel.load(jsonpath) #instatiate the learner learner = PGMLearner() # The methos estimates the parameters for a discrete Bayesian network with # a structure given by graphskeleton in order to maximize the probability # of data given by data result_params = learner.discrete_mle_estimateparams(skel, training_data) result_params.Vdata['Class']# to inspect the network # Check the prediction accuracy # In[ ]: #results = calc_accuracy(dff_train, dff_train_target, 100)
class TestPGMLearner(unittest.TestCase): def setUp(self): # instantiate learner self.l = PGMLearner() # generate graph skeleton skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() # generate sample sequence to try to learn from - discrete nd = NodeData() nd.load("unittestdict.txt") self.samplediscbn = DiscreteBayesianNetwork(skel, nd) self.samplediscseq = self.samplediscbn.randomsample(5000) # generate sample sequence to try to learn from - discrete nda = NodeData() nda.load("unittestlgdict.txt") self.samplelgbn = LGBayesianNetwork(skel, nda) self.samplelgseq = self.samplelgbn.randomsample(10000) self.skel = skel def test_discrete_mle_estimateparams(self): result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq) indexa = result.Vdata['SAT']['vals'].index('lowscore') self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9) indexb = result.Vdata['Letter']['vals'].index('weak') self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05) def test_lg_mle_estimateparams(self): result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq) self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5) self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5) def test_discrete_constraint_estimatestruct(self): result = self.l.discrete_constraint_estimatestruct(self.samplediscseq) self.assertTrue(["Difficulty", "Grade"] in result.E) def test_lg_constraint_estimatestruct(self): result = self.l.lg_constraint_estimatestruct(self.samplelgseq) self.assertTrue(["Intelligence", "Grade"] in result.E) def test_discrete_condind(self): chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"]) self.assertTrue(pv > .05) self.assertTrue(witness, ["Grade"]) chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", []) self.assertTrue(pva < .05) def test_discrete_estimatebn(self): result = self.l.discrete_estimatebn(self.samplediscseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Difficulty"]["cprob"][0]) def test_lg_estimatebn(self): result = self.l.lg_estimatebn(self.samplelgseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
def fun(inputData): #Defining formatting data method def format_data(df): result = [] for row in df.itertuples(): #print(row.Pclass) result.append( dict(great=row.great, good=row.good, clean=row.clean, comfortable=row.comfortable, bad=row.bad, old=row.old, Cleanliness=row.Cleanliness, Location=row.Location, Service=row.Service, Rooms=row.Rooms, Value=row.Value, Overall=row.Overall)) return result #load all preprocessed training data df = pd.read_csv('features.csv', sep=',') #format data to let them correctly processed by libpgm functions node_data = format_data(df) skel = GraphSkeleton() #load structure of our net skel.load("./our-skel.txt") #setting the topologic order skel.toporder() #learner which will estimate parameters e if needed net structure learner = PGMLearner() #estismting parameters for our own model res = learner.discrete_mle_estimateparams(skel, node_data) # get CPT a = TableCPDFactorization(res) #compute the query and evidences as dicts query = dict(Overall=1) # prepare dictionary of values (dopo gli uguali devi mettere i valori che leggi dalla GUI) evidence = dict(Value=inputData[0], Location=inputData[1], Cleanliness=inputData[2], Service=inputData[3], Rooms=inputData[4], bad=inputData[5], old=inputData[6], good=inputData[7], great=inputData[8], comfortable=inputData[9], clean=inputData[10]) print(query) print(evidence) #run the query given evidence result = a.condprobve(query, evidence) print json.dumps(result.vals, indent=2) #res.Vdata["Overall"]["vals"][pos] #arr=[] dizionario = {} for i in range(1, 6): dizionario[res.Vdata["Overall"]["vals"][i - 1]] = result.vals[i - 1] # arr.append(dizionario) #print(str(arr)) return dizionario
class PGMLearnerServer(object): def __init__(self): self.learner = PGMLearner() rospy.Service("~discrete/parameter_estimation", DiscreteParameterEstimation, self.discrete_parameter_estimation_cb) rospy.Service("~discrete/query", DiscreteQuery, self.discrete_query_cb) rospy.Service("~discrete/structure_estimation", DiscreteStructureEstimation, self.discrete_structure_estimation_cb) rospy.Service("~linear_gaussian/parameter_estimation", LinearGaussianParameterEstimation, self.lg_parameter_estimation_cb) rospy.Service("~linear_gaussian/structure_estimation", LinearGaussianStructureEstimation, self.lg_structure_estimation_cb) def discrete_parameter_estimation_cb(self, req): skel = U.graph_skeleton_from_ros(req.graph) skel.toporder() data = U.graph_states_dict_from_ros(req.states) res = self.learner.discrete_mle_estimateparams(skel, data) return DiscreteParameterEstimationResponse( U.discrete_nodes_to_ros(res.Vdata)) def discrete_query_cb(self, req): nd = U.discrete_nodedata_from_ros(req.nodes) skel = U.graph_skeleton_from_node_data(nd) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) q = {n: nd.Vdata[n]["vals"] for n in req.query} ev = {ns.node: ns.state for ns in req.evidence} rospy.loginfo("resolving query %s with evidence %s" % (q, ev)) ans = fn.condprobve(query=q, evidence=ev) rospy.loginfo("%s -> %s" % (ans.scope, ans.vals)) res = DiscreteQueryResponse() node = DiscreteNode() node.name = ans.scope[0] node.outcomes = q[node.name] node.CPT.append(ConditionalProbability(node.outcomes, ans.vals)) res.nodes.append(node) return res def discrete_structure_estimation_cb(self, req): states = [{ns.node: ns.state for ns in s.node_states} for s in req.states] pvalparam = 0.05 # default value indegree = 1 # default value if req.pvalparam != 0.0: pvalparam = req.pvalparam if req.indegree != 0: indegree = req.indegree res = self.learner.discrete_constraint_estimatestruct( states, pvalparam=pvalparam, indegree=indegree) return DiscreteStructureEstimationResponse( U.graph_skeleton_to_ros(res)) def lg_parameter_estimation_cb(self, req): skel = U.graph_skeleton_from_ros(req.graph) skel.toporder() data = U.graph_states_dict_from_ros(req.states) res = self.learner.lg_mle_estimateparams(skel, data) rospy.logdebug("parameter estimation: %s" % res.Vdata) return LinearGaussianParameterEstimationResponse( U.linear_gaussian_nodes_to_ros(res.Vdata)) def lg_structure_estimation_cb(self, req): states = [{ns.node: ns.state for ns in s.node_states} for s in req.states] rospy.logdebug(states) pvalparam = 0.05 # default value bins = 10 # default value indegree = 1 # default value if req.pvalparam != 0.0: pvalparam = req.pvalparam if req.bins != 0: bins = req.bins if req.indegree != 0: indegree = req.indegree rospy.logdebug("bins: %d, pvalparam: %f, indegree: %d" % (bins, pvalparam, indegree)) res = self.learner.lg_constraint_estimatestruct(states, pvalparam=pvalparam, bins=bins, indegree=indegree) rospy.logdebug("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") rospy.logdebug(res.V) rospy.logdebug(res.E) return LinearGaussianStructureEstimationResponse( U.graph_skeleton_to_ros(res))
def __init__(self, module, dataset=None): Trainer.__init__(self, module) #self.setData(dataset) self.ds = dataset self.learner = PGMLearner()
Value=row.Value, Overall=row.Overall)) # result.append(dict(great = row.great, good = row.good, nice = row.nice, clean = row.clean, helpful = row.helpful, comfortable = row.comfortable, # beautiful = row.beautiful, wonderful = row.wonderful, friendly = row.friendly, fantastic = row.fantastic, bad = row.bad, # Cleanliness= row.Cleanliness, Location=row.Location ,Businessservice=row.Businessservice, # Checkin=row.Checkin, Service=row.Service, Rooms=row.Rooms, Value=row.Value, Overall=row.Overall )) return result #load all preprocessed training data df = pd.read_csv('./features_filtrato.csv', sep=',') #format data to let them correctly processed by libpgm functions node_data = format_data(df) skel = GraphSkeleton() #load structure of our net #skel.load("./json_skel.txt") #setting the topologic order #skel.toporder() #learner which will estimate parameters e if needed net structure learner = PGMLearner() #estismting parameters for our own model #res = learner.discrete_mle_estimateparams(skel, node_data) #estimating net structure given training data and paramenters this is an alternative to create a new model on our data net = learner.discrete_estimatebn(node_data) print json.dumps(net.V, indent=2) print json.dumps(net.E, indent=2) res = learner.discrete_mle_estimateparams(net, node_data) print(str(res))
def learnDiscreteBN(df, continous_columns, features_column_names, label_column='cat', draw_network=False): features_df = df.copy() features_df = features_df.drop(label_column, axis=1) labels_df = DataFrame() labels_df[label_column] = df[label_column].copy() for i in continous_columns: bins = np.arange((min(features_df[i])), (max(features_df[i])), ((max(features_df[i]) - min(features_df[i])) / 5.0)) features_df[i] = pandas.np.digitize(features_df[i], bins=bins) data = [] for index, row in features_df.iterrows(): dict = {} for i in features_column_names: dict[i] = row[i] dict[label_column] = labels_df[label_column][index] data.append(dict) print "Init done" learner = PGMLearner() test = learner.discrete_estimatebn(data=data, pvalparam=0.05, indegree=1) # print test.__dict__ f = open('heart_structure.txt', 'w') s = str(test.__dict__) f.write(s) f.flush() f.close() print "done learning" edges = test.E vertices = test.V probas = test.Vdata # print probas dot_string = 'digraph BN{\n' dot_string += 'node[fontname="Arial"];\n' dataframes = {} print "save data" for vertice in vertices: print "New vertice: " + str(vertice) dataframe = DataFrame() pp = pprint.PrettyPrinter(indent=4) # pp.pprint(probas[vertice]) dot_string += vertice.replace(" ", "_") + ' [label="' + vertice + '\n' + '" ]; \n' if len(probas[vertice]['parents']) == 0: dataframe['Outcome'] = None dataframe['Probability'] = None vertex_dict = {} for index_outcome, outcome in enumerate(probas[vertice]['vals']): vertex_dict[str(outcome)] = probas[vertice]["cprob"][index_outcome] od = collections.OrderedDict(sorted(vertex_dict.items())) # print "Vertice: " + str(vertice) # print "%-7s|%-11s" % ("Outcome", "Probability") # print "-------------------" for k, v in od.iteritems(): # print "%-7s|%-11s" % (str(k), str(round(v, 3))) dataframe.loc[len(dataframe)] = [k, v] dataframes[vertice] = dataframe else: # pp.pprint(probas[vertice]) dataframe['Outcome'] = None vertexen = {} for index_outcome, outcome in enumerate(probas[vertice]['vals']): temp = [] for parent_index, parent in enumerate(probas[vertice]["parents"]): # print str([str(float(index_outcome))]) temp = probas[vertice]["cprob"] dataframe[parent] = None vertexen[str(outcome)] = temp dataframe['Probability'] = None od = collections.OrderedDict(sorted(vertexen.items())) # [str(float(i)) for i in ast.literal_eval(key)] # str(v[key][int(float(k))-1]) # print "Vertice: " + str(vertice) + " with parents: " + str(probas[vertice]['parents']) # print "Outcome" + "\t\t" + '\t\t'.join(probas[vertice]['parents']) + "\t\tProbability" # print "------------" * len(probas[vertice]['parents']) *3 # pp.pprint(od.values()) counter = 0 # print number_of_cols for outcome, cprobs in od.iteritems(): for key in cprobs.keys(): array_frame = [] array_frame.append((outcome)) print_string = str(outcome) + "\t\t" for parent_value, parent in enumerate([i for i in ast.literal_eval(key)]): # print "parent-value:"+str(parent_value) # print "parten:"+str(parent) array_frame.append(int(float(parent))) # print "lengte array_frame: "+str(len(array_frame)) print_string += parent + "\t\t" array_frame.append(cprobs[key][counter]) # print "lengte array_frame (2): "+str(len(array_frame)) # print cprobs[key][counter] print_string += str(cprobs[key][counter]) + "\t" # for stront in [str(round(float(i), 3)) for i in ast.literal_eval(key)]: # print_string += stront + "\t\t" # print "print string: " + print_string # print "array_frame:" + str(array_frame) dataframe.loc[len(dataframe)] = array_frame counter += 1 print "Vertice " + str(vertice) + " done" dataframes[vertice] = dataframe for edge in edges: dot_string += edge[0].replace(" ", "_") + ' -> ' + edge[1].replace(" ", "_") + ';\n' dot_string += '}' src = Source(dot_string) if draw_network:src.render('../data/BN', view=draw_network) if draw_network:src.render('../data/BN', view=False) print "vizualisation done" return dataframes
def bn_learn(attr, cicli, passed_file): path_to_sentiments = 'sentiment_AFINN' print "Using AFINN sentiment dictionary" if attr == 0: print "Considering tweets' number" elif attr == 1: print "Considering averaged number of positive, negative and neutral tweets" elif attr == 2: print "Considering averaged value of positive and negative tweets" elif attr == 3: print "Considering positive and negative tweets\' increment" elif attr == 4: print "Considering bullisment index obtained by number of tweets sentiment" elif attr == 5: print "Considering bullisment index obtained by tweets value of sentiment" print "And considering market trend" all_data = [] files = [ path_to_sentiments + "/" + file for file in os.listdir(path_to_sentiments) if file.endswith('.json') ] for file in files: with open(file) as sentiment_file: data = json.load(sentiment_file) vdata = {} if attr == 0: vdata["com"] = data["n_tweets"] elif attr == 1: vdata["pos"] = data["n_pos_ave"] vdata["neg"] = data["n_neg_ave"] vdata["neu"] = data["n_neu_ave"] elif attr == 2: vdata["pos"] = data["pos_val_ave"] vdata["neg"] = data["neg_val_ave"] elif attr == 3: vdata["pos"] = data["pos_inc"] vdata["neg"] = data["neg_inc"] elif attr == 4: vdata["com"] = data["bull_ind"] elif attr == 5: vdata["com"] = data["bull_ind_val"] vdata["market"] = data["market_inc"] all_data.append(vdata) skel = GraphSkeleton() if len(all_data[0]) == 2: skel.load("network_struct_1_vertex.json") print "Loading structure with 2 node" elif len(all_data[0]) == 3: skel.load("network_struct_2_vertex.json") print "Loading structure with 3 node" elif len(all_data[0]) == 4: skel.load("network_struct_3_vertex.json") print "Loading structure with 4 node" skel.toporder() learner = PGMLearner() result = learner.lg_mle_estimateparams(skel, all_data) for key in result.Vdata.keys(): result.Vdata[key]['type'] = 'lg' prob_pos = prob_neg = prob_neu = 0 for data in all_data: if data['market'] == 1: prob_pos += 1 elif data['market'] == 0: prob_neu += 1 else: prob_neg += 1 prob_pos = float(prob_pos) / float(len(all_data)) prob_neg = float(prob_neg) / float(len(all_data)) prob_neu = float(prob_neu) / float(len(all_data)) tmp = {} tmp['numoutcomes'] = len(all_data) tmp['cprob'] = [prob_pos, prob_neg, prob_neu] tmp['parents'] = result.Vdata['market']['parents'] tmp['vals'] = ['positive', 'negative', 'neutral'] tmp['type'] = 'discrete' tmp['children'] = result.Vdata['market']['children'] result.Vdata['market'] = tmp node = Discrete(result.Vdata["market"]) print "Loading node as Discrete" estimated, real = mcmc_json(passed_file, attr, cicli, node) return estimated, real
for sample in featureVectorSamples: for vertex in vertices: if vertex not in sample.keys(): sample[vertex] = vertexAverages[vertex] # Testing just 4 vertices for now (takes a really, really long time to use all of them) keysToRemove = list(vertices)[5:] #keysToRemove.remove('HIV') for sample in featureVectorSamples: for key in keysToRemove: del sample[key] # instantiate learner learner = PGMLearner() # Voila, it makes us a bayesian network! result = learner.lg_estimatebn(featureVectorSamples, pvalparam = 0.10) # output print json.dumps(result.Vdata, indent=2) print json.dumps(result.E, indent=2) # For progress report: previous things we tried! # Hackily removes all vertices with missing values, leaving just country name and year :P # Instead, we should totally impute values using our linear classifier! # commonVertices = vertices # for sample in featureVectorSamples: # commonVertices2 = set([v for v in commonVertices])
class PGMTrainer(Trainer): def __init__(self, module, dataset=None): Trainer.__init__(self, module) #self.setData(dataset) self.ds = dataset self.learner = PGMLearner() def train(self): """Train the associated module for one epoch.""" assert len(self.ds) > 0, "Dataset cannot be empty." gbds = [] ds2 = [] for seq in self.ds: for state_, action_, reward_ in seq: #sample = dict(theta=state_[0],thetaV=state_[1],s=state_[2],sV=state_[3],Action=action_[0],Reward=reward_[0]) sample = dict(StateA=state_[0],StateB=state_[2],StateC=state_[1],StateD=state_[3],Action=action_[0],Reward=reward_[0]) #print state_, action_, reward_ # sample = dict(StateA=state_[0],StateB=state_[2],StateC=state_[1],StateD=state_[3],Action=action_[0],Reward=reward_[0]) #sample = dict(theta=state_[0],thetaPrime=state_[2],s=state_[1],sPrime=state_[3],Action=action_[0],Reward=reward_[0]) if sample["Reward"] >= 0: gbds.append(sample) if sample["Reward"] == -1: ds2.append(sample) #print sample["Reward"] # sort samples for highest reward # bdss = sorted(gbds, key=lambda tup: tup["Reward"],reverse=True) # #print "BDS: " #print json.dumps(gbds, indent=2) # print "BDSS: " # print json.dumps(bdss, indent=2) #tokeep = bdss[:max(2,len(bdss)/2)] #print bds # estimate parameters # print "data size: ", len(bds), len(gbds) N = 200 if len(gbds) < N: l = N - len(gbds) n = len(ds2) t = len(ds2[n-l:]) gbds.extend(ds2[n-l:]) print "ds:, ", len(gbds), len(ds2) if len(gbds) < 100: # print "burn" self.module.burn = True return else: self.module.burn = False if len(gbds) < 5: #there was no rewarding action, so nothing to learn self.module.burn = True return N = 200 if len(gbds) > N: #only take the newest N samples l = len(gbds) gbds = gbds[l-N:] # print "new effective set", len(gbds) skel = GraphSkeleton() #load network topology skel.load("net2.txt") # skel.load("workfile") skel.toporder() # estimate parameters self.module.net = self.learner.lg_mle_estimateparams(skel, gbds)
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner # generate some data to use nd = NodeData() nd.load("grades.txt") # an input file skel = GraphSkeleton() skel.load("grades.txt") skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) data = bn.randomsample(80000) # instantiate my learner learner = PGMLearner() # estimate structure result = learner.discrete_constraint_estimatestruct(data) # output print json.dumps(result.E, indent=2)
def learn(self): print "ds: ", len(self.dataset) #print self.dataset data = [] rw = [] bestreward = -100 for seq in self.dataset: for state_, action_, reward_ in seq: if reward_[0] > bestreward: bestreward = reward_[0] # find limit for theta print "bestrw", bestreward nds = [] lt=[] ls = [] ltv =[] lsv=[] i = 0 for seq in self.dataset: for state_, action_, reward_ in seq: # if reward_[0] == 0: # print state_, action_, reward_ #print state_, reward_ if reward_[0] == bestreward: ns = (state_, action_[0], reward_[0]) nds.append(ns) # print state_[0], state_[2], reward_[0], bestreward t = state_[0] tv= state_[1] s = state_[2] sv = state_[3] if t > 0.05: print "hmmm,", i, t #raise Exception(i) i += 1 lt.append(t) ls.append(s) ltv.append(tv) lsv.append(sv) limits = dict(theta=[min(lt),max(lt)],s=[min(ls),max(ls)],thetaV=[min(ltv),max(ltv)],sV=[min(lsv),max(lsv)]) print "limits: ", limits # print "all good things:", nds #convert ds for seq in self.dataset: for state_, action_, reward_ in seq: # sample = dict(theta=state_[0],thetaPrime=state_[1],s=state_[2],sPrime=state_[3],Action=action_[0],Reward=reward_[0]) # # # dtpo = min( abs(sample["thetaPrime"] - limits["theta"][0]), abs(sample["thetaPrime"] - limits["theta"][1])) # dto = min( abs(sample["theta"] - limits["theta"][0]), abs(sample["theta"] - limits["theta"][1])) # dspo = min( abs(sample["sPrime"] - limits["s"][0]), abs(sample["sPrime"] - limits["s"][1])) # dso = min( abs(sample["s"] - limits["s"][0]), abs(sample["s"] - limits["s"][1])) # # #print dspo, dso # # netsample = dict(theta=sample["theta"],s=sample["s"],Action=sample["Action"],Reward=sample["Reward"]) # # did this action improve theta or s?? # if dtpo <= dto or dspo <= dso: #yes it did ## data.append(netsample) # rw.append(sample["Reward"]) sample = dict(theta=state_[0],thetaV=state_[1],s=state_[2],sV=state_[3],Action=action_[0],Reward=reward_[0]) #print state_, action_, reward_ #print sample if sample["Reward"] != 990: data.append(sample) if numpy.random.random() >= 9.1: continue import matplotlib.pyplot as plt import pandas as pd df = pd.DataFrame(rw) # print df # plt.figure() # df[0].diff().hist() # instantiate my learner learner = PGMLearner() # estimate parameters rbn = [] for i in range(0,1): result = learner.lg_constraint_estimatestruct(data,bins=10, pvalparam=0.05) rbn.append(result) print len(result.E), result.E result = rbn[0] # output - toggle comment to see print json.dumps(result.V, indent=2) print len(result.E), "Edges", result.E import pydot # this time, in graph_type we specify we want a DIrected GRAPH graph = pydot.Dot(graph_type='digraph') nd = {} for n in result.V: nd[n] = pydot.Node(n) graph.add_node(nd[n]) for e in result.E: graph.add_edge(pydot.Edge(nd[e[0]], nd[e[1]])) graph.write_png('eg.png') from IPython.display import Image Image('eg.png') f = open('workfile', 'w') f.write("{\n \"V\":") f.write(json.dumps(result.V)) f.write(",\n \"E\":") f.write(json.dumps(result.E)) f.write("}") f.close() skel = GraphSkeleton() skel.load("workfile") # topologically order graphskeleton skel.toporder() return
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner # generate some data to use nd = NodeData() nd.load("bayes_structure.txt") # an input file skel = GraphSkeleton() skel.load("bayes_structure.txt") skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) data = bn.randomsample(200) # instantiate my learner learner = PGMLearner() # estimate parameters from data and skeleton result = learner.discrete_mle_estimateparams(skel, data) # output print json.dumps(result.Vdata, indent=2)
return result #load all preprocessed training data df = pd.read_csv('features.csv', sep=',') #format data to let them correctly processed by libpgm functions node_data = format_data(df) skel = GraphSkeleton() #load structure of our net skel.load("./skel-learned2.txt") #setting the topologic order skel.toporder() #learner which will estimate parameters e if needed net structure learner = PGMLearner() #estismting parameters for our own model res = learner.discrete_mle_estimateparams(skel, node_data) """ #estimating net structure given training data and paramenters this is an alternative to create a new model on our data net = learner.discrete_estimatebn(node_data) with open("reteTestMeta.csv", "a") as gv: gv.write(json.dumps(net.V, indent=2)) gv.write(json.dumps(net.E, indent=2)) res = learner.discrete_mle_estimateparams(net, node_data) with open("modelloMeta.csv", "a") as gv: gv.write(json.dumps(res.E, indent=2)) gv.write(json.dumps(res.Vdata, indent=2)) """
from libpgm.graphskeleton import GraphSkeleton from libpgm.tablecpdfactorization import TableCPDFactorization from libpgm.pgmlearner import PGMLearner text = open("../unifiedMLData2.json") data=text.read() printable = set(string.printable) asciiData=filter(lambda x: x in printable, data) listofDicts=json.loads(asciiData) skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf=TableCPDFactorization(result) #Rating 1 Given Genre is Drama myquery = dict(rating=[1]) myevidence = dict(genre='Drama') result=tcf.specificquery(query=myquery,evidence=myevidence) print result tcf.refresh() #Rating 2 Given Genre is Drama
__author__ = 'Amir' import json from libpgm.graphskeleton import GraphSkeleton from libpgm.pgmlearner import PGMLearner with open('data.txt', 'r') as f: data = eval(f.read()) # generate some data to use skel = GraphSkeleton() skel.load("skel.txt") skel.toporder() # instantiate my learner learner = PGMLearner() # estimate parameters from data and skeleton result = learner.lg_mle_estimateparams(skel, data) # output print json.dumps(result.Vdata, indent=2)
from libpgm.graphskeleton import GraphSkeleton from libpgm.tablecpdfactorization import TableCPDFactorization from libpgm.pgmlearner import PGMLearner text = open("../unifiedMLData2.json") data = text.read() printable = set(string.printable) asciiData = filter(lambda x: x in printable, data) #listofDicts=json.dumps(data) listofDicts = json.loads(asciiData) skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf = TableCPDFactorization(result) #Rating 1 Given Occupation is student myquery = dict(rating=[1]) myevidence = dict(occupation='student') result = tcf.specificquery(query=myquery, evidence=myevidence) print result tcf.refresh() #Rating 2 Given Occupation is student myquery = dict(rating=[2])
def learnDiscreteBN_with_structure(df, continous_columns, features_column_names, label_column='cat', draw_network=False): features_df = df.copy() features_df = features_df.drop(label_column, axis=1) labels_df = DataFrame() labels_df[label_column] = df[label_column].copy() for i in continous_columns: bins = np.arange((min(features_df[i])), (max(features_df[i])), ((max(features_df[i]) - min(features_df[i])) / 5.0)) features_df[i] = pandas.np.digitize(features_df[i], bins=bins) data = [] for index, row in features_df.iterrows(): dict = {} for i in features_column_names: dict[i] = row[i] dict[label_column] = labels_df[label_column][index] data.append(dict) print "Init done" learner = PGMLearner() graph = GraphSkeleton() graph.V = [] graph.E = [] graph.V.append(label_column) for vertice in features_column_names: graph.V.append(vertice) graph.E.append([vertice, label_column]) test = learner.discrete_mle_estimateparams(graphskeleton=graph, data=data) print "done learning" edges = test.E vertices = test.V probas = test.Vdata # print probas dot_string = 'digraph BN{\n' dot_string += 'node[fontname="Arial"];\n' dataframes = {} print "save data" for vertice in vertices: print "New vertice: " + str(vertice) dataframe = DataFrame() pp = pprint.PrettyPrinter(indent=4) # pp.pprint(probas[vertice]) dot_string += vertice.replace( " ", "_") + ' [label="' + vertice + '\n' + '" ]; \n' if len(probas[vertice]['parents']) == 0: dataframe['Outcome'] = None dataframe['Probability'] = None vertex_dict = {} for index_outcome, outcome in enumerate(probas[vertice]['vals']): vertex_dict[str( outcome)] = probas[vertice]["cprob"][index_outcome] od = collections.OrderedDict(sorted(vertex_dict.items())) # print "Vertice: " + str(vertice) # print "%-7s|%-11s" % ("Outcome", "Probability") # print "-------------------" for k, v in od.iteritems(): # print "%-7s|%-11s" % (str(k), str(round(v, 3))) dataframe.loc[len(dataframe)] = [k, v] dataframes[vertice] = dataframe else: # pp.pprint(probas[vertice]) dataframe['Outcome'] = None vertexen = {} for index_outcome, outcome in enumerate(probas[vertice]['vals']): temp = [] for parent_index, parent in enumerate( probas[vertice]["parents"]): # print str([str(float(index_outcome))]) temp = probas[vertice]["cprob"] dataframe[parent] = None vertexen[str(outcome)] = temp dataframe['Probability'] = None od = collections.OrderedDict(sorted(vertexen.items())) # [str(float(i)) for i in ast.literal_eval(key)] # str(v[key][int(float(k))-1]) # print "Vertice: " + str(vertice) + " with parents: " + str(probas[vertice]['parents']) # print "Outcome" + "\t\t" + '\t\t'.join(probas[vertice]['parents']) + "\t\tProbability" # print "------------" * len(probas[vertice]['parents']) *3 # pp.pprint(od.values()) counter = 0 # print number_of_cols for outcome, cprobs in od.iteritems(): for key in cprobs.keys(): array_frame = [] array_frame.append((outcome)) print_string = str(outcome) + "\t\t" for parent_value, parent in enumerate( [i for i in ast.literal_eval(key)]): # print "parent-value:"+str(parent_value) # print "parten:"+str(parent) array_frame.append(int(float(parent))) # print "lengte array_frame: "+str(len(array_frame)) print_string += parent + "\t\t" array_frame.append(cprobs[key][counter]) # print "lengte array_frame (2): "+str(len(array_frame)) # print cprobs[key][counter] print_string += str(cprobs[key][counter]) + "\t" # for stront in [str(round(float(i), 3)) for i in ast.literal_eval(key)]: # print_string += stront + "\t\t" # print "print string: " + print_string # print "array_frame:" + str(array_frame) dataframe.loc[len(dataframe)] = array_frame counter += 1 print "Vertice " + str(vertice) + " done" dataframes[vertice] = dataframe for edge in edges: dot_string += edge[0].replace(" ", "_") + ' -> ' + edge[1].replace( " ", "_") + ';\n' dot_string += '}' # src = Source(dot_string) # src.render('../data/BN', view=draw_network) # src.render('../data/BN', view=False) print "vizualisation done" return dataframes