Exemple #1
0
def estimate_distrib(skel, samples, query, evidence):
    learner = PGMLearner()
    bayesnet = learner.discrete_mle_estimateparams(skel, samples)
    tablecpd = TableCPDFactorization(bayesnet)
    fac = tablecpd.condprobve(query, evidence)
    df2 = printdist(fac, bayesnet)
    return df2
def bayesNetCont(textFile,unique):
	cleanText(textFile,'tempOutput.txt')

	## imports textFile into pandas
	try:
		df = pd.read_csv('tempOutput.txt', sep='\s+',dtype='float64',header=None)
	except:
		print 'next file'
		return
	df.fillna(0, inplace=True)
	df.convert_objects(convert_numeric=True)

	## set to either setUnique() or setMax()
	if unique is True:
		grouped = setUnique(df)
	else:
		grouped = setMax(df)

	
	#turns into correct dictionary format for libpgm
	newDict = DFtoLibpgm(grouped)

# instantiate my learner 
	learner = PGMLearner()

# estimate structure
	#gaussian
	try:
		result = learner.lg_constraint_estimatestruct(newDict)
	except:
		print 'error'
		return
		
# output
	return result
def learn_net(data):
    '''learns Bayes net on raw data'''
    data_dict = data.to_dict('records')
    learner = PGMLearner()
    skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1)
    skel.toporder()
    disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict)
    return disc_bayes_net
def learn_net_discretize(data, vars_to_discretize, n_bins):
    '''learn Bayes net after selected variables have been discretized'''
    data_subset, bins = discretize(data, vars_to_discretize, n_bins=n_bins)
    data_dict = data_subset.to_dict('records')
    learner = PGMLearner()
    skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1)
    skel.toporder()
    disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict)
    return disc_bayes_net, bins
Exemple #5
0
def anomaly_libpgm():
    files = glob.glob(join('data', '*.txt'))
    for file in files[0:1]:
        print file
        data=read_data_libpgm(file)
        learner = PGMLearner()
        
        result=learner.lg_estimatebn(data, indegree=3)
        
        print result.E
Exemple #6
0
def test_libpgm(df1):

    data = df1.T.to_dict().values()
    #pprint(data)
    skel = GraphSkeleton()
    skel.load("bn_struct.txt")
    
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, data)
    
    print json.dumps(result.Vdata, indent=2)
Exemple #7
0
def learnBN(fdata_array, bn_file):

    bn_path = os.path.join(experiment_dir, 'parameters', bn_file + '.txt')

    skel = GraphSkeleton()
    skel.load(bn_path)
    skel.toporder()

    learner = PGMLearner()
    bn = learner.discrete_mle_estimateparams(skel, fdata_array)

    return bn
def getBNparams(graph, ddata, n):
    # Gets Disc. BN parameters given a graph skeleton
    #skeleton should include t-1 and t nodes for each variable
    nodes = range(1, (n * 2) + 1)
    nodes = map(str, nodes)
    edges = gk.edgelist(graph)
    for i in range(len(edges)):
        edges[i] = list([edges[i][0], str(n + int(edges[i][1]))])
    skel = GraphSkeleton()
    skel.V = nodes
    skel.E = edges
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, ddata)
    return result
Exemple #9
0
 def __init__(self):
     self.learner = PGMLearner()
     rospy.Service("~discrete/parameter_estimation",
                   DiscreteParameterEstimation,
                   self.discrete_parameter_estimation_cb)
     rospy.Service("~discrete/query", DiscreteQuery, self.discrete_query_cb)
     rospy.Service("~discrete/structure_estimation",
                   DiscreteStructureEstimation,
                   self.discrete_structure_estimation_cb)
     rospy.Service("~linear_gaussian/parameter_estimation",
                   LinearGaussianParameterEstimation,
                   self.lg_parameter_estimation_cb)
     rospy.Service("~linear_gaussian/structure_estimation",
                   LinearGaussianStructureEstimation,
                   self.lg_structure_estimation_cb)
def createData():
   nd = NodeData()
   skel = GraphSkeleton()
   fpath = "job_interview.txt"
   nd.load(fpath)
   skel.load(fpath)
   skel.toporder()
   bn = DiscreteBayesianNetwork(skel, nd)

   learner = PGMLearner()
   data = bn.randomsample(1000)
   X, Y = 'Grades', 'Offer'
   c,p,w=learner.discrete_condind(data, X, Y, ['Interview'])
   print "independence between X and Y: ", c, " p-value ", p, " witness node: ", w
   result = learner.discrete_constraint_estimatestruct(data)
   print result.E
def em(data,bn,skel):
    lk_last=100
    times=0
    while 1:
        d2=data_with_hidden(data,bn)
        learner = PGMLearner()#toolbox
        bn=learner.discrete_mle_estimateparams(skel,d2)#toolbox
        lk=likelihood(d2,bn)
        print "LogLikelihood:", lk
        times +=1

        if abs((lk-lk_last)/lk_last)<0.001:
            break
        lk_last=lk
    print times
    return bn
Exemple #12
0
def em(data, bn, skel):
    lk_last = 100
    times = 0
    while 1:
        d2 = data_with_hidden(data, bn)
        learner = PGMLearner()  #toolbox
        bn = learner.discrete_mle_estimateparams(skel, d2)  #toolbox
        lk = likelihood(d2, bn)
        print "LogLikelihood:", lk
        times += 1

        if abs((lk - lk_last) / lk_last) < 0.001:
            break
        lk_last = lk
    print times
    return bn
Exemple #13
0
def buildBN(trainingData, binstyleDict, numbinsDict,
            **kwargs):  # need to modify to accept skel or skelfile

    discretized_training_data, bin_ranges = discretizeTrainingData(
        trainingData, binstyleDict, numbinsDict, True)
    print 'discret training ', discretized_training_data

    if 'skel' in kwargs:
        # load file into skeleton
        if isinstance(kwargs['skel'], basestring):
            skel = GraphSkeleton()
            skel.load(kwargs['skel'])
            skel.toporder()
        else:
            skel = kwargs['skel']

    # learn bayesian network
    learner = PGMLearner()
    # baynet = learner.discrete_mle_estimateparams(skel, discretized_training_data)
    # baynet = discrete_estimatebn(learner, discretized_training_data, skel, 0.05, 1)
    baynet = discrete_mle_estimateparams2(
        skel, discretized_training_data
    )  # using discrete_mle_estimateparams2 written as function in this file, not calling from libpgm

    return baynet
def net2():
    nd = NodeData()
    skel = GraphSkeleton()
    nd.load("net.txt")  # an input file
    skel.load("net.txt")

    # topologically order graphskeleton
    skel.toporder()

    # load bayesian network
    lgbn = LGBayesianNetwork(skel, nd)

    in_data=read_data.getdata2()
    learner = PGMLearner()
    bn=learner.lg_mle_estimateparams(skel,in_data)

    p=cal_prob(in_data[300:500],bn)
    print p
    return 0
Exemple #15
0
def net2():
    nd = NodeData()
    skel = GraphSkeleton()
    nd.load("net.txt")  # an input file
    skel.load("net.txt")

    # topologically order graphskeleton
    skel.toporder()

    # load bayesian network
    lgbn = LGBayesianNetwork(skel, nd)

    in_data = read_data.getdata2()
    learner = PGMLearner()
    bn = learner.lg_mle_estimateparams(skel, in_data)

    p = cal_prob(in_data[300:500], bn)
    print p
    return 0
Exemple #16
0
def bayesNet(textFile):
    cleanText(textFile, 'tempOutput.txt')

    ## imports textFile into pandas

    try:
        df = pd.read_csv('tempOutput.txt',
                         sep='\s+',
                         dtype='float32',
                         header=None)
    except:
        print 'next file'
        return
    df.fillna(0, inplace=True)
    df.convert_objects(convert_numeric=True)

    ##

    for i, row in df.iterrows():
        print df.ix[0, i]
        df.ix[0, i] = df.ix[0, i] + str(i)

    grouped = df.set_index([0], verify_integrity=True)

    df2 = grouped.to_dict()

    print json.dumps(df2, indent=2)

    newDict = []

    for key in df2.keys():
        newDict.append(df2[key])

    #print json.dumps(newDict, indent=2)
# instantiate my learner
    learner = PGMLearner()

    # estimate structure
    result = learner.lg_constraint_estimatestruct(newDict)

    # output
    return json.dumps(result.E, indent=2)
Exemple #17
0
    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel
Exemple #18
0
def calc_accuracy(dff_train, dff_train_target, nb_iterations):
    
    result = np.zeros(nb_iterations)

    for itera in range(nb_iterations):
        XX_train, XX_test, yy_train, yy_test = train_test_split(dff_train, dff_train_target, test_size=0.33)
        data4bn = format_data(XX_train)
        learner = PGMLearner()
        # estimate parameters
        result_bn = learner.discrete_mle_estimateparams(skel, data4bn)
        #result_bn.Vdata
        result_predict = calc_BNprob(XX_test)
        BN_test_probs = pd.DataFrame()
        BN_test_probs['ground_truth'] = yy_test
        Test_prob = pd.concat([yy_test.reset_index().Surv, result_predict],  axis = 1, ignore_index = True)                    .rename(columns = {0:'ground_truth' , 1:'class_resu'})
        accuracy = Test_prob[Test_prob.ground_truth == Test_prob.class_resu].shape[0]/(1.0*Test_prob.shape[0])
        #print("Accuracy is {}").format(accuracy)
        result[itera] = accuracy
        
    return result
Exemple #19
0
def bayesNetDiscrete(textFile, quant_no, unique):
    cleanText(textFile, 'tempOutput.txt')

    ## imports textFile into pandas
    try:
        df = pd.read_csv('tempOutput.txt',
                         sep='\s+',
                         dtype='float64',
                         header=None)
    except:
        print 'next file'
        return
    df.fillna(0, inplace=True)
    df.convert_objects(convert_numeric=True)

    ## set to either setUnique() or setMax()
    if unique is True:
        grouped = setUnique(df)
    else:
        grouped = setMax(df)

    ## quantiles is qcut(), fixed width divisions is cut
    grouped = quantize(quant_no, grouped)

    #turns into correct dictionary format for libpgm
    newDict = DFtoLibpgm(grouped)

    # instantiate my learner
    learner = PGMLearner()

    # estimate structure
    try:
        result = learner.discrete_estimatebn(newDict)
    except:
        print 'error'
        #result = learner.discrete_estimatebn([dict([('a',1),('b',2)])])
        return


# output
    return result
def bayesNet(textFile):
	cleanText(textFile,'tempOutput.txt')

	## imports textFile into pandas

	try:
		df = pd.read_csv('tempOutput.txt', sep='\s+',dtype='float32',header=None)
	except:
		print 'next file'
		return
	df.fillna(0, inplace=True)
	df.convert_objects(convert_numeric=True)

	## 

	for i, row in df.iterrows():
		print df.ix[0,i]
     		df.ix[0,i] = df.ix[0,i] + str(i)		

	grouped = df.set_index([0], verify_integrity=True)

	df2 = grouped.to_dict()

	print json.dumps(df2, indent=2)	
	
	newDict = []

	for key in df2.keys():
		newDict.append(df2[key])

	
	#print json.dumps(newDict, indent=2)
# instantiate my learner 
	learner = PGMLearner()

# estimate structure
	result = learner.lg_constraint_estimatestruct(newDict)

# output
	return json.dumps(result.E, indent=2)
def bayesNetDiscrete(textFile,quant_no,unique):
	cleanText(textFile,'tempOutput.txt')

	## imports textFile into pandas
	try:
		df = pd.read_csv('tempOutput.txt', sep='\s+',dtype='float64',header=None)
	except:
		print 'next file'
		return
	df.fillna(0, inplace=True)
	df.convert_objects(convert_numeric=True)

	## set to either setUnique() or setMax()
	if unique is True:
		grouped = setUnique(df)
	else:
		grouped = setMax(df)


	## quantiles is qcut(), fixed width divisions is cut	
	grouped = quantize(quant_no,grouped)

	
	#turns into correct dictionary format for libpgm
	newDict = DFtoLibpgm(grouped)

# instantiate my learner 
	learner = PGMLearner()

# estimate structure
	try:
		result = learner.discrete_estimatebn(newDict)
	except:
		print 'error'
		#result = learner.discrete_estimatebn([dict([('a',1),('b',2)])])	
		return
# output
	return result
Exemple #22
0
def bayesNetCont(textFile, unique):
    cleanText(textFile, 'tempOutput.txt')

    ## imports textFile into pandas
    try:
        df = pd.read_csv('tempOutput.txt',
                         sep='\s+',
                         dtype='float64',
                         header=None)
    except:
        print 'next file'
        return
    df.fillna(0, inplace=True)
    df.convert_objects(convert_numeric=True)

    ## set to either setUnique() or setMax()
    if unique is True:
        grouped = setUnique(df)
    else:
        grouped = setMax(df)

    #turns into correct dictionary format for libpgm
    newDict = DFtoLibpgm(grouped)

    # instantiate my learner
    learner = PGMLearner()

    # estimate structure
    #gaussian
    try:
        result = learner.lg_constraint_estimatestruct(newDict)
    except:
        print 'error'
        return

# output
    return result
Exemple #23
0
def main():
    # filename
    features_file = './../data/features.csv'

    # read data into list
    handwriting_features = postmaster.readCSVIntoListAsDict(features_file)

    # learn structure
    # instantiate learner
    learner = PGMLearner()

    pvalue = 0.25
    indegree = 1
    # estimate structure
    #result = learner.discrete_constraint_estimatestruct(
    #	handwriting_features, pvalue, indegree)
    result = learner.discrete_estimatebn(handwriting_features)

    #result = learner.discrete_condind(handwriting_features, 'f1', 'f2',
    #	['f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9'])
    # output
    #print result.chi, result.pval, result.U
    #print json.dumps(result.E, indent=2)
    print json.dumps(result.Vdata, indent=2)
Exemple #24
0
    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel
Exemple #25
0
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.pgmlearner import PGMLearner

# generate some data to use
nd = NodeData()
nd.load("gaussGrades.txt")  # an input file
skel = GraphSkeleton()
skel.load("gaussGrades.txt")
skel.toporder()
lgbn = LGBayesianNetwork(skel, nd)
data = lgbn.randomsample(8000)

print data

# instantiate my learner
learner = PGMLearner()

# estimate structure
result = learner.lg_constraint_estimatestruct(data)

# output
print json.dumps(result.E, indent=2)
@author: himanshu
'''
import json
from networkx import DiGraph, draw
from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.pgmlearner import PGMLearner
import matplotlib.pyplot as plt
from data_extractor import DataExtractor


#  generate some data to use
data_ext = DataExtractor('genome', format = 'json')
data = data_ext.get_data_vectors()
print 'Got data with ', len(data), ' vectors'
#  instantiate my learner
learner = PGMLearner()

print 'learning the structure'
#  estimate structure
result = learner.discrete_constraint_estimatestruct(data, pvalparam = 0.02)

#  output
print json.dumps(result.E, indent = 2)
graph = DiGraph()
graph.add_edges_from(result.E)
draw(graph)
plt.show()
# vertices = set(mainFeatures)
# for i, sample in enumerate(training_arr):
#   newSample = {}
#   newSample['HIV'] = hiv_training_arr[i]
#   for k in sample.keys():
#     if k in vertices:
#       newSample[k] = sample[k]
#   condensed_feature_vectors.append(newSample)
################################################

# import pprint
# pp = pprint.PrettyPrinter(indent=4)
# pp.pprint(condensed_feature_vectors)

# instantiate learner 
learner = PGMLearner()

# Voila, it makes us a bayesian network!
bayesian_networks_by_region = {}
for region in condensed_feature_vectors_by_region:
  bayesian_networks_by_region[region] = learner.lg_estimatebn(condensed_feature_vectors_by_region[region])
  print region
  print json.dumps(bayesian_networks_by_region[region].Vdata, indent=2)
  print json.dumps(bayesian_networks_by_region[region].E, indent=2)

#Evaluation:
predictions = []
test_arrs_by_region = {}
hiv_test_arrs_by_region = {}
for i, sample in enumerate(test_arr):
  region = getRegion(sample['Country'])
Exemple #28
0
# output - toggle comment to see
#print json.dumps(result, indent=2)

# (8) --------------------------------------------------------------------------
# Learn the CPDs of a discrete Bayesian network, given data and a structure:

# say I have some data
data = bn.randomsample(200)

# and a graphskeleton
skel = GraphSkeleton()
skel.load("../tests/unittestdict.txt")

# instantiate my learner
learner = PGMLearner()

# estimate parameters
result = learner.discrete_mle_estimateparams(skel, data)

# output - toggle comment to see
#print json.dumps(result.Vdata, indent=2)

# (9) -------------------------------------------------------------------------
# Learn the structure of a discrete Bayesian network, given only data:

# say I have some data
data = bn.randomsample(2000)

# instantiate my learner
learner = PGMLearner()
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.pgmlearner import PGMLearner

nd = NodeData()
nd.load("nodedata.json")
skel = GraphSkeleton()
skel.load("nodedata.json")
skel.toporder()

bn = DiscreteBayesianNetwork(skel,nd)
with open("manipulatedata.json") as fp:
    data = json.load(fp)

learner = PGMLearner()

# result = learner.discrete_constraint_estimatestruct(data)
result = learner.discrete_estimatebn(data)

print json.dumps(result.E, indent=2)
print json.dumps(result.Vdata, indent=2)
Exemple #30
0
              or di['HandStrength'] == '3ofakind'):
            di['HandStrength'] = 'Medium'
        elif (di['HandStrength'] == 'TwoPairs'
              or (di['HandStrength'] == 'OnePair' and
                  (di['Rank'] == 'A' or di['Rank'] == 'K' or di['Rank'] == 'Q'
                   or di['Rank'] == 'J'))):
            di['HandStrength'] = 'Weak'
        else:
            di['HandStrength'] = 'VeryWeak'
print('###################    PART A    ################################')
print('*************** Network Parameters for BN agent1*****************')
for skeleton in ["Poker_Network.txt"
                 ]:  # loading skeleton of Network from given-file
    skel = GraphSkeleton()
    skel.load(skeleton)
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, mdata)
    print json.dumps(result.Vdata, indent=2)

print('*************** Network Parameters for BN agent2*****************')
for skeleton in ["Poker_Network.txt"]:
    skel = GraphSkeleton()
    skel.load(skeleton)
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, mdata2)
    print json.dumps(result.Vdata, indent=2)

print('########################## PART B    ################################')
print('**************  NB1 parameters for agent 1  *************************')
for skeleton in ["Poker_Network1.txt"]:
    skel = GraphSkeleton()
Exemple #31
0
[{'Class': 3, 'Fare': 0, 'Sex': 1, 'Surv': 0},
 {'Class': 1, 'Fare': 1, 'Sex': 0, 'Surv': 1},
 {'Class': 3, 'Fare': 0, 'Sex': 0, 'Surv': 1},
 {'Class': 1, 'Fare': 1, 'Sex': 0, 'Surv': 1},...]
# In[ ]:


nd       = NodeData()
skel     = GraphSkeleton()

#The structure is defined in the file titanic_skel
jsonpath ="titanic_skel.json"
skel.load(jsonpath)

#instatiate the learner
learner = PGMLearner()

# The methos estimates the parameters for a discrete Bayesian network with
# a structure given by graphskeleton in order to maximize the probability 
# of data given by data
result_params = learner.discrete_mle_estimateparams(skel, training_data)

result_params.Vdata['Class']# to inspect the network


# Check the prediction accuracy

# In[ ]:


#results = calc_accuracy(dff_train, dff_train_target, 100)
Exemple #32
0
class TestPGMLearner(unittest.TestCase):
    
    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData()
        nd.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(skel, nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData()
        nda.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(skel, nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel

    def test_discrete_mle_estimateparams(self):
        result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq)
        indexa = result.Vdata['SAT']['vals'].index('lowscore')
        self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9)
        indexb = result.Vdata['Letter']['vals'].index('weak')
        self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05)

    def test_lg_mle_estimateparams(self):
        result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq)
        self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5)
        self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5)

    def test_discrete_constraint_estimatestruct(self):
        result = self.l.discrete_constraint_estimatestruct(self.samplediscseq)
        self.assertTrue(["Difficulty", "Grade"] in result.E)

    def test_lg_constraint_estimatestruct(self):
        result = self.l.lg_constraint_estimatestruct(self.samplelgseq)
        self.assertTrue(["Intelligence", "Grade"] in result.E)

    def test_discrete_condind(self):
        chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"])
        self.assertTrue(pv > .05)
        self.assertTrue(witness, ["Grade"])
        chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", [])  
        self.assertTrue(pva < .05)

    def test_discrete_estimatebn(self):
        result = self.l.discrete_estimatebn(self.samplediscseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Difficulty"]["cprob"][0])

    def test_lg_estimatebn(self):
        result = self.l.lg_estimatebn(self.samplelgseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
Exemple #33
0
def fun(inputData):

    #Defining formatting data method
    def format_data(df):
        result = []
        for row in df.itertuples():
            #print(row.Pclass)
            result.append(
                dict(great=row.great,
                     good=row.good,
                     clean=row.clean,
                     comfortable=row.comfortable,
                     bad=row.bad,
                     old=row.old,
                     Cleanliness=row.Cleanliness,
                     Location=row.Location,
                     Service=row.Service,
                     Rooms=row.Rooms,
                     Value=row.Value,
                     Overall=row.Overall))
        return result

    #load all preprocessed training data
    df = pd.read_csv('features.csv', sep=',')

    #format data to let them correctly processed by libpgm functions
    node_data = format_data(df)

    skel = GraphSkeleton()
    #load structure of our net
    skel.load("./our-skel.txt")
    #setting the topologic order
    skel.toporder()
    #learner which will estimate parameters e if needed net structure
    learner = PGMLearner()

    #estismting parameters for our own model
    res = learner.discrete_mle_estimateparams(skel, node_data)

    # get CPT
    a = TableCPDFactorization(res)
    #compute the query and evidences as dicts
    query = dict(Overall=1)
    # prepare dictionary of values (dopo gli uguali devi mettere i valori che leggi dalla GUI)

    evidence = dict(Value=inputData[0],
                    Location=inputData[1],
                    Cleanliness=inputData[2],
                    Service=inputData[3],
                    Rooms=inputData[4],
                    bad=inputData[5],
                    old=inputData[6],
                    good=inputData[7],
                    great=inputData[8],
                    comfortable=inputData[9],
                    clean=inputData[10])

    print(query)
    print(evidence)

    #run the query given evidence
    result = a.condprobve(query, evidence)

    print json.dumps(result.vals, indent=2)
    #res.Vdata["Overall"]["vals"][pos]
    #arr=[]
    dizionario = {}
    for i in range(1, 6):
        dizionario[res.Vdata["Overall"]["vals"][i - 1]] = result.vals[i - 1]
    #    arr.append(dizionario)
    #print(str(arr))
    return dizionario
Exemple #34
0
class PGMLearnerServer(object):
    def __init__(self):
        self.learner = PGMLearner()
        rospy.Service("~discrete/parameter_estimation",
                      DiscreteParameterEstimation,
                      self.discrete_parameter_estimation_cb)
        rospy.Service("~discrete/query", DiscreteQuery, self.discrete_query_cb)
        rospy.Service("~discrete/structure_estimation",
                      DiscreteStructureEstimation,
                      self.discrete_structure_estimation_cb)
        rospy.Service("~linear_gaussian/parameter_estimation",
                      LinearGaussianParameterEstimation,
                      self.lg_parameter_estimation_cb)
        rospy.Service("~linear_gaussian/structure_estimation",
                      LinearGaussianStructureEstimation,
                      self.lg_structure_estimation_cb)

    def discrete_parameter_estimation_cb(self, req):
        skel = U.graph_skeleton_from_ros(req.graph)
        skel.toporder()
        data = U.graph_states_dict_from_ros(req.states)
        res = self.learner.discrete_mle_estimateparams(skel, data)
        return DiscreteParameterEstimationResponse(
            U.discrete_nodes_to_ros(res.Vdata))

    def discrete_query_cb(self, req):
        nd = U.discrete_nodedata_from_ros(req.nodes)
        skel = U.graph_skeleton_from_node_data(nd)
        skel.toporder()
        bn = DiscreteBayesianNetwork(skel, nd)
        fn = TableCPDFactorization(bn)
        q = {n: nd.Vdata[n]["vals"] for n in req.query}
        ev = {ns.node: ns.state for ns in req.evidence}

        rospy.loginfo("resolving query %s with evidence %s" % (q, ev))
        ans = fn.condprobve(query=q, evidence=ev)
        rospy.loginfo("%s -> %s" % (ans.scope, ans.vals))
        res = DiscreteQueryResponse()
        node = DiscreteNode()
        node.name = ans.scope[0]
        node.outcomes = q[node.name]
        node.CPT.append(ConditionalProbability(node.outcomes, ans.vals))
        res.nodes.append(node)
        return res

    def discrete_structure_estimation_cb(self, req):
        states = [{ns.node: ns.state
                   for ns in s.node_states} for s in req.states]
        pvalparam = 0.05  # default value
        indegree = 1  # default value
        if req.pvalparam != 0.0:
            pvalparam = req.pvalparam
        if req.indegree != 0:
            indegree = req.indegree
        res = self.learner.discrete_constraint_estimatestruct(
            states, pvalparam=pvalparam, indegree=indegree)
        return DiscreteStructureEstimationResponse(
            U.graph_skeleton_to_ros(res))

    def lg_parameter_estimation_cb(self, req):
        skel = U.graph_skeleton_from_ros(req.graph)
        skel.toporder()
        data = U.graph_states_dict_from_ros(req.states)
        res = self.learner.lg_mle_estimateparams(skel, data)
        rospy.logdebug("parameter estimation: %s" % res.Vdata)
        return LinearGaussianParameterEstimationResponse(
            U.linear_gaussian_nodes_to_ros(res.Vdata))

    def lg_structure_estimation_cb(self, req):
        states = [{ns.node: ns.state
                   for ns in s.node_states} for s in req.states]
        rospy.logdebug(states)
        pvalparam = 0.05  # default value
        bins = 10  # default value
        indegree = 1  # default value
        if req.pvalparam != 0.0:
            pvalparam = req.pvalparam
        if req.bins != 0:
            bins = req.bins
        if req.indegree != 0:
            indegree = req.indegree
        rospy.logdebug("bins: %d, pvalparam: %f, indegree: %d" %
                       (bins, pvalparam, indegree))
        res = self.learner.lg_constraint_estimatestruct(states,
                                                        pvalparam=pvalparam,
                                                        bins=bins,
                                                        indegree=indegree)
        rospy.logdebug("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        rospy.logdebug(res.V)
        rospy.logdebug(res.E)
        return LinearGaussianStructureEstimationResponse(
            U.graph_skeleton_to_ros(res))
Exemple #35
0
    def __init__(self, module, dataset=None):

        Trainer.__init__(self, module)
        #self.setData(dataset)
        self.ds = dataset
        self.learner = PGMLearner()
                 Value=row.Value,
                 Overall=row.Overall))
        #      result.append(dict(great = row.great, good = row.good, nice = row.nice, clean = row.clean, helpful = row.helpful, comfortable = row.comfortable,
    # beautiful = row.beautiful, wonderful = row.wonderful, friendly = row.friendly, fantastic = row.fantastic, bad = row.bad,
    #   Cleanliness= row.Cleanliness, Location=row.Location ,Businessservice=row.Businessservice,
    #    Checkin=row.Checkin, Service=row.Service, Rooms=row.Rooms, Value=row.Value, Overall=row.Overall ))
    return result


#load all preprocessed training data
df = pd.read_csv('./features_filtrato.csv', sep=',')
#format data to let them correctly processed by libpgm functions
node_data = format_data(df)

skel = GraphSkeleton()
#load structure of our net
#skel.load("./json_skel.txt")
#setting the topologic order
#skel.toporder()
#learner which will estimate parameters e if needed net structure
learner = PGMLearner()

#estismting parameters for our own model
#res = learner.discrete_mle_estimateparams(skel, node_data)

#estimating net structure given training data and paramenters this is an alternative to create a new model on our data
net = learner.discrete_estimatebn(node_data)
print json.dumps(net.V, indent=2)
print json.dumps(net.E, indent=2)
res = learner.discrete_mle_estimateparams(net, node_data)
print(str(res))
def learnDiscreteBN(df, continous_columns, features_column_names, label_column='cat', draw_network=False):
    features_df = df.copy()
    features_df = features_df.drop(label_column, axis=1)

    labels_df = DataFrame()
    labels_df[label_column] = df[label_column].copy()

    for i in continous_columns:
        bins = np.arange((min(features_df[i])), (max(features_df[i])),
                         ((max(features_df[i]) - min(features_df[i])) / 5.0))
        features_df[i] = pandas.np.digitize(features_df[i], bins=bins)

    data = []
    for index, row in features_df.iterrows():
        dict = {}
        for i in features_column_names:
            dict[i] = row[i]
        dict[label_column] = labels_df[label_column][index]
        data.append(dict)

    print "Init done"
    learner = PGMLearner()

    test = learner.discrete_estimatebn(data=data, pvalparam=0.05, indegree=1)

    # print test.__dict__

    f = open('heart_structure.txt', 'w')
    s = str(test.__dict__)
    f.write(s)
    f.flush()
    f.close()

    print "done learning"
    edges = test.E
    vertices = test.V
    probas = test.Vdata

    # print probas

    dot_string = 'digraph BN{\n'
    dot_string += 'node[fontname="Arial"];\n'

    dataframes = {}

    print "save data"
    for vertice in vertices:
        print "New vertice: " + str(vertice)
        dataframe = DataFrame()

        pp = pprint.PrettyPrinter(indent=4)
        # pp.pprint(probas[vertice])
        dot_string += vertice.replace(" ", "_") + ' [label="' + vertice + '\n' + '" ]; \n'

        if len(probas[vertice]['parents']) == 0:
            dataframe['Outcome'] = None
            dataframe['Probability'] = None
            vertex_dict = {}
            for index_outcome, outcome in enumerate(probas[vertice]['vals']):
                vertex_dict[str(outcome)] = probas[vertice]["cprob"][index_outcome]

            od = collections.OrderedDict(sorted(vertex_dict.items()))
            # print "Vertice: " + str(vertice)
            # print "%-7s|%-11s" % ("Outcome", "Probability")
            # print "-------------------"
            for k, v in od.iteritems():
                # print "%-7s|%-11s" % (str(k), str(round(v, 3)))
                dataframe.loc[len(dataframe)] = [k, v]
            dataframes[vertice] = dataframe
        else:
            # pp.pprint(probas[vertice])
            dataframe['Outcome'] = None

            vertexen = {}
            for index_outcome, outcome in enumerate(probas[vertice]['vals']):
                temp = []
                for parent_index, parent in enumerate(probas[vertice]["parents"]):
                    # print str([str(float(index_outcome))])
                    temp = probas[vertice]["cprob"]
                    dataframe[parent] = None
                vertexen[str(outcome)] = temp

            dataframe['Probability'] = None
            od = collections.OrderedDict(sorted(vertexen.items()))

            # [str(float(i)) for i in ast.literal_eval(key)]


            # str(v[key][int(float(k))-1])

            # print "Vertice: " + str(vertice) + " with parents: " + str(probas[vertice]['parents'])
            # print "Outcome" + "\t\t" + '\t\t'.join(probas[vertice]['parents']) + "\t\tProbability"
            # print "------------" * len(probas[vertice]['parents']) *3
            # pp.pprint(od.values())

            counter = 0
            # print number_of_cols
            for outcome, cprobs in od.iteritems():
                for key in cprobs.keys():
                    array_frame = []
                    array_frame.append((outcome))
                    print_string = str(outcome) + "\t\t"
                    for parent_value, parent in enumerate([i for i in ast.literal_eval(key)]):
                        # print "parent-value:"+str(parent_value)
                        # print "parten:"+str(parent)
                        array_frame.append(int(float(parent)))
                        # print "lengte array_frame: "+str(len(array_frame))
                        print_string += parent + "\t\t"
                    array_frame.append(cprobs[key][counter])
                    # print "lengte array_frame (2): "+str(len(array_frame))
                    # print  cprobs[key][counter]
                    print_string += str(cprobs[key][counter]) + "\t"
                    # for stront in [str(round(float(i), 3)) for i in ast.literal_eval(key)]:
                    #     print_string += stront + "\t\t"
                    # print "print string: " + print_string
                    # print "array_frame:" + str(array_frame)
                    dataframe.loc[len(dataframe)] = array_frame
                counter += 1
        print "Vertice " + str(vertice) + " done"
        dataframes[vertice] = dataframe

    for edge in edges:
        dot_string += edge[0].replace(" ", "_") + ' -> ' + edge[1].replace(" ", "_") + ';\n'

    dot_string += '}'
    src = Source(dot_string)
    if draw_network:src.render('../data/BN', view=draw_network)
    if draw_network:src.render('../data/BN', view=False)
    print "vizualisation done"
    return dataframes
Exemple #38
0
def bn_learn(attr, cicli, passed_file):
    path_to_sentiments = 'sentiment_AFINN'

    print "Using AFINN sentiment dictionary"

    if attr == 0:
        print "Considering tweets' number"
    elif attr == 1:
        print "Considering averaged number of positive, negative and neutral tweets"
    elif attr == 2:
        print "Considering averaged value of positive and negative tweets"
    elif attr == 3:
        print "Considering positive and negative tweets\' increment"
    elif attr == 4:
        print "Considering bullisment index obtained by number of tweets sentiment"
    elif attr == 5:
        print "Considering bullisment index obtained by tweets value of sentiment"

    print "And considering market trend"

    all_data = []
    files = [
        path_to_sentiments + "/" + file
        for file in os.listdir(path_to_sentiments) if file.endswith('.json')
    ]
    for file in files:
        with open(file) as sentiment_file:
            data = json.load(sentiment_file)

            vdata = {}
            if attr == 0:
                vdata["com"] = data["n_tweets"]
            elif attr == 1:
                vdata["pos"] = data["n_pos_ave"]
                vdata["neg"] = data["n_neg_ave"]
                vdata["neu"] = data["n_neu_ave"]
            elif attr == 2:
                vdata["pos"] = data["pos_val_ave"]
                vdata["neg"] = data["neg_val_ave"]
            elif attr == 3:
                vdata["pos"] = data["pos_inc"]
                vdata["neg"] = data["neg_inc"]
            elif attr == 4:
                vdata["com"] = data["bull_ind"]
            elif attr == 5:
                vdata["com"] = data["bull_ind_val"]

            vdata["market"] = data["market_inc"]

            all_data.append(vdata)

    skel = GraphSkeleton()
    if len(all_data[0]) == 2:
        skel.load("network_struct_1_vertex.json")
        print "Loading structure with 2 node"
    elif len(all_data[0]) == 3:
        skel.load("network_struct_2_vertex.json")
        print "Loading structure with 3 node"
    elif len(all_data[0]) == 4:
        skel.load("network_struct_3_vertex.json")
        print "Loading structure with 4 node"
    skel.toporder()

    learner = PGMLearner()
    result = learner.lg_mle_estimateparams(skel, all_data)
    for key in result.Vdata.keys():
        result.Vdata[key]['type'] = 'lg'

    prob_pos = prob_neg = prob_neu = 0
    for data in all_data:
        if data['market'] == 1:
            prob_pos += 1
        elif data['market'] == 0:
            prob_neu += 1
        else:
            prob_neg += 1
    prob_pos = float(prob_pos) / float(len(all_data))
    prob_neg = float(prob_neg) / float(len(all_data))
    prob_neu = float(prob_neu) / float(len(all_data))

    tmp = {}
    tmp['numoutcomes'] = len(all_data)
    tmp['cprob'] = [prob_pos, prob_neg, prob_neu]
    tmp['parents'] = result.Vdata['market']['parents']
    tmp['vals'] = ['positive', 'negative', 'neutral']
    tmp['type'] = 'discrete'
    tmp['children'] = result.Vdata['market']['children']
    result.Vdata['market'] = tmp

    node = Discrete(result.Vdata["market"])
    print "Loading node as Discrete"

    estimated, real = mcmc_json(passed_file, attr, cicli, node)

    return estimated, real
Exemple #39
0
for sample in featureVectorSamples:
  for vertex in vertices:
    if vertex not in sample.keys():
      sample[vertex] = vertexAverages[vertex]


# Testing just 4 vertices for now (takes a really, really long time to use all of them)
keysToRemove = list(vertices)[5:]
#keysToRemove.remove('HIV')

for sample in featureVectorSamples:
  for key in keysToRemove:
    del sample[key]

# instantiate learner 
learner = PGMLearner()

# Voila, it makes us a bayesian network!
result = learner.lg_estimatebn(featureVectorSamples, pvalparam = 0.10)

# output
print json.dumps(result.Vdata, indent=2)
print json.dumps(result.E, indent=2)

# For progress report: previous things we tried!

# Hackily removes all vertices with missing values, leaving just country name and year :P
# Instead, we should totally impute values using our linear classifier!
# commonVertices = vertices
# for sample in featureVectorSamples:
#   commonVertices2 = set([v for v in commonVertices])
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.pgmlearner import PGMLearner

# generate some data to use
nd = NodeData()
nd.load("gaussGrades.txt")    # an input file
skel = GraphSkeleton()
skel.load("gaussGrades.txt")
skel.toporder()
lgbn = LGBayesianNetwork(skel, nd)
data = lgbn.randomsample(8000)

print data

# instantiate my learner 
learner = PGMLearner()

# estimate structure
result = learner.lg_constraint_estimatestruct(data)

# output
print json.dumps(result.E, indent=2)
Exemple #41
0
class PGMTrainer(Trainer):

    def __init__(self, module, dataset=None):

        Trainer.__init__(self, module)
        #self.setData(dataset)
        self.ds = dataset
        self.learner = PGMLearner()

    def train(self):
        """Train the associated module for one epoch."""
        assert len(self.ds) > 0, "Dataset cannot be empty."


            
        gbds = []
        
        ds2 = []
        for seq in self.ds:
            for state_, action_, reward_ in seq:

                #sample = dict(theta=state_[0],thetaV=state_[1],s=state_[2],sV=state_[3],Action=action_[0],Reward=reward_[0])
                sample = dict(StateA=state_[0],StateB=state_[2],StateC=state_[1],StateD=state_[3],Action=action_[0],Reward=reward_[0])

                 #print state_, action_, reward_
#                sample = dict(StateA=state_[0],StateB=state_[2],StateC=state_[1],StateD=state_[3],Action=action_[0],Reward=reward_[0])
                #sample = dict(theta=state_[0],thetaPrime=state_[2],s=state_[1],sPrime=state_[3],Action=action_[0],Reward=reward_[0])

                if sample["Reward"] >= 0:
                    gbds.append(sample)
                    
                if sample["Reward"] == -1:
                    ds2.append(sample)
                #print sample["Reward"]

        # sort samples for highest reward
#        bdss = sorted(gbds, key=lambda tup: tup["Reward"],reverse=True)
#        
        #print "BDS: "
        #print json.dumps(gbds, indent=2)
#        print "BDSS: "
#        print json.dumps(bdss, indent=2)
        
        #tokeep = bdss[:max(2,len(bdss)/2)]
        
        #print bds
        # estimate parameters
#        print "data size: ", len(bds),  len(gbds)
        N = 200
        if len(gbds) < N:
            l = N - len(gbds)
            n = len(ds2)
            
            t = len(ds2[n-l:])
            gbds.extend(ds2[n-l:])
            
        print "ds:, ", len(gbds), len(ds2)
        
        
        if len(gbds) < 100:
#            print "burn"
            self.module.burn = True
            return
        else:
            self.module.burn = False
            
        
        if len(gbds) < 5: #there was no rewarding action, so nothing to learn
          self.module.burn = True
          return
          
        N = 200
        if len(gbds) > N:
            #only take the newest N samples

            l = len(gbds)
            gbds = gbds[l-N:]
#            print "new effective set", len(gbds)
        
        skel = GraphSkeleton()
        #load network topology
        skel.load("net2.txt")
#        skel.load("workfile")
        skel.toporder()


        # estimate parameters
        self.module.net = self.learner.lg_mle_estimateparams(skel, gbds)
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.pgmlearner import PGMLearner

# generate some data to use
nd = NodeData()
nd.load("grades.txt")    # an input file
skel = GraphSkeleton()
skel.load("grades.txt")
skel.toporder()
bn = DiscreteBayesianNetwork(skel, nd)
data = bn.randomsample(80000)

# instantiate my learner 
learner = PGMLearner()

# estimate structure
result = learner.discrete_constraint_estimatestruct(data)

# output
print json.dumps(result.E, indent=2)
Exemple #43
0
    def learn(self):
        print "ds: ", len(self.dataset)
        #print self.dataset
        
        data = []
        
        rw = []
        
        
        bestreward = -100
        for seq in self.dataset:
            for state_, action_, reward_ in seq:
                if reward_[0] > bestreward:
                    bestreward = reward_[0]
                
                # find limit for theta
                 
        print "bestrw", bestreward
        nds = []
        lt=[]
        ls = []
        ltv =[]
        lsv=[]
        
        i = 0
        for seq in self.dataset:
            for state_, action_, reward_ in seq:
#                if reward_[0] == 0:
#                    print state_, action_, reward_
                #print state_, reward_
                if reward_[0] == bestreward:
                    ns = (state_, action_[0], reward_[0])
                    nds.append(ns)
#                    print state_[0], state_[2], reward_[0], bestreward
                    
                    t = state_[0]
                    tv= state_[1]
                    
                    s = state_[2] 
                    sv = state_[3]    
                    if t > 0.05:
                        print "hmmm,", i, t
                        #raise Exception(i)
                        
                    i += 1
                    lt.append(t)
                    ls.append(s)
                    ltv.append(tv)
                    lsv.append(sv)
        
        
        limits = dict(theta=[min(lt),max(lt)],s=[min(ls),max(ls)],thetaV=[min(ltv),max(ltv)],sV=[min(lsv),max(lsv)])
        
        print "limits: ", limits
                    
#        print "all good things:", nds
                    
                
                
        #convert ds
        for seq in self.dataset:
            for state_, action_, reward_ in seq:
                
#                sample = dict(theta=state_[0],thetaPrime=state_[1],s=state_[2],sPrime=state_[3],Action=action_[0],Reward=reward_[0])
#
#                
#                dtpo = min( abs(sample["thetaPrime"] - limits["theta"][0]), abs(sample["thetaPrime"] - limits["theta"][1]))
#                dto = min( abs(sample["theta"] - limits["theta"][0]), abs(sample["theta"] - limits["theta"][1]))
#                dspo = min( abs(sample["sPrime"] - limits["s"][0]), abs(sample["sPrime"] - limits["s"][1]))
#                dso = min( abs(sample["s"] - limits["s"][0]), abs(sample["s"] - limits["s"][1]))
#                             
#               #print dspo, dso
#                
#                netsample = dict(theta=sample["theta"],s=sample["s"],Action=sample["Action"],Reward=sample["Reward"])
#                # did this action improve theta or s??
#                if dtpo <= dto or dspo <= dso: #yes it did            
##                    data.append(netsample)
#                    rw.append(sample["Reward"])
                sample = dict(theta=state_[0],thetaV=state_[1],s=state_[2],sV=state_[3],Action=action_[0],Reward=reward_[0])

                #print state_, action_, reward_
                #print sample
                if sample["Reward"] != 990:
                    data.append(sample)
                    if numpy.random.random() >= 9.1:
                        continue
                
                
          

        import matplotlib.pyplot as plt
        import pandas as pd
        df = pd.DataFrame(rw)
#        print df        
        
#        plt.figure()
#        df[0].diff().hist()
        
        # instantiate my learner 
        learner = PGMLearner()
        
        # estimate parameters
        rbn = []
        for i in range(0,1):
            result = learner.lg_constraint_estimatestruct(data,bins=10, pvalparam=0.05)
            rbn.append(result)
            print len(result.E), result.E
            
        result = rbn[0]
        
        # output - toggle comment to see
       

        print json.dumps(result.V, indent=2)
        print len(result.E), "Edges", result.E
        
        import pydot

        # this time, in graph_type we specify we want a DIrected GRAPH
        graph = pydot.Dot(graph_type='digraph')
        nd = {}
        for n in result.V:
            nd[n] = pydot.Node(n)
            graph.add_node(nd[n])
            
        for e in result.E:
            
            graph.add_edge(pydot.Edge(nd[e[0]], nd[e[1]]))
            
        graph.write_png('eg.png')
        from IPython.display import Image
        Image('eg.png')
        
        
        f = open('workfile', 'w')
        f.write("{\n \"V\":")
        f.write(json.dumps(result.V))
        f.write(",\n \"E\":")
        f.write(json.dumps(result.E))
        f.write("}")
        f.close()
        
        skel = GraphSkeleton()
        skel.load("workfile")
        
        # topologically order graphskeleton
        skel.toporder()
        

        return
Exemple #44
0
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.pgmlearner import PGMLearner

# generate some data to use
nd = NodeData()
nd.load("bayes_structure.txt")  # an input file
skel = GraphSkeleton()
skel.load("bayes_structure.txt")
skel.toporder()
bn = DiscreteBayesianNetwork(skel, nd)
data = bn.randomsample(200)

# instantiate my learner
learner = PGMLearner()

# estimate parameters from data and skeleton
result = learner.discrete_mle_estimateparams(skel, data)

# output
print json.dumps(result.Vdata, indent=2)
Exemple #45
0
class TestPGMLearner(unittest.TestCase):

    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData()
        nd.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(skel, nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData()
        nda.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(skel, nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel

    def test_discrete_mle_estimateparams(self):
        result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq)
        indexa = result.Vdata['SAT']['vals'].index('lowscore')
        self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9)
        indexb = result.Vdata['Letter']['vals'].index('weak')
        self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05)

    def test_lg_mle_estimateparams(self):
        result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq)
        self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5)
        self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5)

    def test_discrete_constraint_estimatestruct(self):
        result = self.l.discrete_constraint_estimatestruct(self.samplediscseq)
        self.assertTrue(["Difficulty", "Grade"] in result.E)

    def test_lg_constraint_estimatestruct(self):
        result = self.l.lg_constraint_estimatestruct(self.samplelgseq)
        self.assertTrue(["Intelligence", "Grade"] in result.E)

    def test_discrete_condind(self):
        chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"])
        self.assertTrue(pv > .05)
        self.assertTrue(witness, ["Grade"])
        chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", [])
        self.assertTrue(pva < .05)

    def test_discrete_estimatebn(self):
        result = self.l.discrete_estimatebn(self.samplediscseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Difficulty"]["cprob"][0])

    def test_lg_estimatebn(self):
        result = self.l.lg_estimatebn(self.samplelgseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
Exemple #46
0
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.pgmlearner import PGMLearner

# generate some data to use
nd = NodeData()
nd.load("grades.txt")  # an input file
skel = GraphSkeleton()
skel.load("grades.txt")
skel.toporder()
bn = DiscreteBayesianNetwork(skel, nd)
data = bn.randomsample(80000)

# instantiate my learner
learner = PGMLearner()

# estimate structure
result = learner.discrete_constraint_estimatestruct(data)

# output
print json.dumps(result.E, indent=2)
Exemple #47
0
    return result


#load all preprocessed training data
df = pd.read_csv('features.csv', sep=',')

#format data to let them correctly processed by libpgm functions
node_data = format_data(df)

skel = GraphSkeleton()
#load structure of our net
skel.load("./skel-learned2.txt")
#setting the topologic order
skel.toporder()
#learner which will estimate parameters e if needed net structure
learner = PGMLearner()

#estismting parameters for our own model
res = learner.discrete_mle_estimateparams(skel, node_data)
"""
#estimating net structure given training data and paramenters this is an alternative to create a new model on our data
net = learner.discrete_estimatebn(node_data)

with open("reteTestMeta.csv", "a") as gv:
  gv.write(json.dumps(net.V, indent=2))
  gv.write(json.dumps(net.E, indent=2))  
res = learner.discrete_mle_estimateparams(net, node_data)
with open("modelloMeta.csv", "a") as gv:
  gv.write(json.dumps(res.E, indent=2))
  gv.write(json.dumps(res.Vdata, indent=2))  
"""
Exemple #48
0
# output - toggle comment to see
#print json.dumps(result, indent=2)

# (8) --------------------------------------------------------------------------
# Learn the CPDs of a discrete Bayesian network, given data and a structure:

# say I have some data
data = bn.randomsample(200)

# and a graphskeleton
skel = GraphSkeleton()
skel.load("../tests/unittestdict.txt")

# instantiate my learner 
learner = PGMLearner()

# estimate parameters
result = learner.discrete_mle_estimateparams(skel, data)

# output - toggle comment to see
#print json.dumps(result.Vdata, indent=2)

# (9) -------------------------------------------------------------------------
# Learn the structure of a discrete Bayesian network, given only data:

# say I have some data
data = bn.randomsample(2000)

# instantiate my learner 
learner = PGMLearner()
from libpgm.graphskeleton import GraphSkeleton
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.pgmlearner import PGMLearner

text = open("../unifiedMLData2.json")
data=text.read()
printable = set(string.printable)
asciiData=filter(lambda x: x in printable, data)

listofDicts=json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf=TableCPDFactorization(result)

#Rating 1 Given Genre  is Drama
myquery = dict(rating=[1])
myevidence = dict(genre='Drama')
result=tcf.specificquery(query=myquery,evidence=myevidence)
print result


tcf.refresh()

#Rating 2 Given Genre  is Drama
Exemple #50
0
__author__ = 'Amir'

import json

from libpgm.graphskeleton import GraphSkeleton
from libpgm.pgmlearner import PGMLearner

with open('data.txt', 'r') as f:
    data = eval(f.read())

# generate some data to use
skel = GraphSkeleton()
skel.load("skel.txt")
skel.toporder()

# instantiate my learner
learner = PGMLearner()

# estimate parameters from data and skeleton
result = learner.lg_mle_estimateparams(skel, data)

# output
print json.dumps(result.Vdata, indent=2)
Exemple #51
0
from libpgm.graphskeleton import GraphSkeleton
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.pgmlearner import PGMLearner

text = open("../unifiedMLData2.json")
data = text.read()
printable = set(string.printable)
asciiData = filter(lambda x: x in printable, data)

#listofDicts=json.dumps(data)
listofDicts = json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf = TableCPDFactorization(result)

#Rating 1 Given Occupation is student
myquery = dict(rating=[1])
myevidence = dict(occupation='student')
result = tcf.specificquery(query=myquery, evidence=myevidence)
print result

tcf.refresh()

#Rating 2 Given Occupation is student
myquery = dict(rating=[2])
Exemple #52
0
def learnDiscreteBN_with_structure(df,
                                   continous_columns,
                                   features_column_names,
                                   label_column='cat',
                                   draw_network=False):
    features_df = df.copy()
    features_df = features_df.drop(label_column, axis=1)

    labels_df = DataFrame()
    labels_df[label_column] = df[label_column].copy()

    for i in continous_columns:
        bins = np.arange((min(features_df[i])), (max(features_df[i])),
                         ((max(features_df[i]) - min(features_df[i])) / 5.0))
        features_df[i] = pandas.np.digitize(features_df[i], bins=bins)

    data = []
    for index, row in features_df.iterrows():
        dict = {}
        for i in features_column_names:
            dict[i] = row[i]
        dict[label_column] = labels_df[label_column][index]
        data.append(dict)

    print "Init done"
    learner = PGMLearner()

    graph = GraphSkeleton()

    graph.V = []
    graph.E = []

    graph.V.append(label_column)

    for vertice in features_column_names:
        graph.V.append(vertice)
        graph.E.append([vertice, label_column])

    test = learner.discrete_mle_estimateparams(graphskeleton=graph, data=data)

    print "done learning"

    edges = test.E
    vertices = test.V
    probas = test.Vdata

    # print probas

    dot_string = 'digraph BN{\n'
    dot_string += 'node[fontname="Arial"];\n'

    dataframes = {}

    print "save data"
    for vertice in vertices:
        print "New vertice: " + str(vertice)
        dataframe = DataFrame()

        pp = pprint.PrettyPrinter(indent=4)
        # pp.pprint(probas[vertice])
        dot_string += vertice.replace(
            " ", "_") + ' [label="' + vertice + '\n' + '" ]; \n'

        if len(probas[vertice]['parents']) == 0:
            dataframe['Outcome'] = None
            dataframe['Probability'] = None
            vertex_dict = {}
            for index_outcome, outcome in enumerate(probas[vertice]['vals']):
                vertex_dict[str(
                    outcome)] = probas[vertice]["cprob"][index_outcome]

            od = collections.OrderedDict(sorted(vertex_dict.items()))
            # print "Vertice: " + str(vertice)
            # print "%-7s|%-11s" % ("Outcome", "Probability")
            # print "-------------------"
            for k, v in od.iteritems():
                # print "%-7s|%-11s" % (str(k), str(round(v, 3)))
                dataframe.loc[len(dataframe)] = [k, v]
            dataframes[vertice] = dataframe
        else:
            # pp.pprint(probas[vertice])
            dataframe['Outcome'] = None

            vertexen = {}
            for index_outcome, outcome in enumerate(probas[vertice]['vals']):
                temp = []
                for parent_index, parent in enumerate(
                        probas[vertice]["parents"]):
                    # print str([str(float(index_outcome))])
                    temp = probas[vertice]["cprob"]
                    dataframe[parent] = None
                vertexen[str(outcome)] = temp

            dataframe['Probability'] = None
            od = collections.OrderedDict(sorted(vertexen.items()))

            # [str(float(i)) for i in ast.literal_eval(key)]

            # str(v[key][int(float(k))-1])

            # print "Vertice: " + str(vertice) + " with parents: " + str(probas[vertice]['parents'])
            # print "Outcome" + "\t\t" + '\t\t'.join(probas[vertice]['parents']) + "\t\tProbability"
            # print "------------" * len(probas[vertice]['parents']) *3
            # pp.pprint(od.values())

            counter = 0
            # print number_of_cols
            for outcome, cprobs in od.iteritems():
                for key in cprobs.keys():
                    array_frame = []
                    array_frame.append((outcome))
                    print_string = str(outcome) + "\t\t"
                    for parent_value, parent in enumerate(
                        [i for i in ast.literal_eval(key)]):
                        # print "parent-value:"+str(parent_value)
                        # print "parten:"+str(parent)
                        array_frame.append(int(float(parent)))
                        # print "lengte array_frame: "+str(len(array_frame))
                        print_string += parent + "\t\t"
                    array_frame.append(cprobs[key][counter])
                    # print "lengte array_frame (2): "+str(len(array_frame))
                    # print  cprobs[key][counter]
                    print_string += str(cprobs[key][counter]) + "\t"
                    # for stront in [str(round(float(i), 3)) for i in ast.literal_eval(key)]:
                    #     print_string += stront + "\t\t"
                    # print "print string: " + print_string
                    # print "array_frame:" + str(array_frame)
                    dataframe.loc[len(dataframe)] = array_frame
                counter += 1
        print "Vertice " + str(vertice) + " done"
        dataframes[vertice] = dataframe

    for edge in edges:
        dot_string += edge[0].replace(" ", "_") + ' -> ' + edge[1].replace(
            " ", "_") + ';\n'

    dot_string += '}'
    # src = Source(dot_string)
    # src.render('../data/BN', view=draw_network)
    # src.render('../data/BN', view=False)
    print "vizualisation done"
    return dataframes