def test_bde2(self): myskl = Data(read_csv(open('myksl.dat'))) # > ksbln.prior <- jointprior(kslbn) # Imaginary sample size: 64 # > kslbn.fit <- getnetwork(learn(kslbn,myksl,ksbln.prior)) # deal scores # > score(nodes(kslbn.fit)$Smok) # [1] -637.9544 # > score(nodes(kslbn.fit)$Alc) # [1] -752.027 # > score(nodes(kslbn.fit)$Work) # [1] -463.1716 # > score(nodes(kslbn.fit)$Sex) # [1] -751.0772 # > score(nodes(kslbn.fit)$Year) # [1] -666.6585 # > score(kslbn.fit) # [1] -3270.889 node_scores = (-637.9544,-752.027,-463.1716,-751.0772,-666.6585) net_score = -3270.889 score = 0 for i, cpt in enumerate(self.cpts): cpt = cpt.get_counts_sql(myskl) this_score = cpt.bdeu_score(64) self.assertAlmostEqual(this_score,node_scores[i],places) score += this_score self.assertAlmostEqual(score,net_score,places)
def test_bde3(self): # just tests that myskl has the right counts myskl = Data(read_csv(open('myksl.dat'))) # > ksbln.prior <- jointprior(kslbn) # Imaginary sample size: 64 # > kslbn.fit <- getnetwork(learn(kslbn,myksl,ksbln.prior)) # deal scores # > score(nodes(kslbn.fit)$Smok) # [1] -637.9544 # > score(nodes(kslbn.fit)$Alc) # [1] -752.027 # > score(nodes(kslbn.fit)$Work) # [1] -463.1716 # > score(nodes(kslbn.fit)$Sex) # [1] -751.0772 # > score(nodes(kslbn.fit)$Year) # [1] -666.6585 # > score(kslbn.fit) # [1] -3270.889 node_scores = (-637.9544,-752.027,-463.1716,-751.0772,-666.6585) net_score = -3270.889 score = 0 for i, cpt in enumerate(self.cpts): child = cpt.child() parents = list(cpt.variables() - set([child])) this_score = myskl.family_score(child,parents,64.0) self.assertAlmostEqual(this_score,node_scores[i],places) score += this_score self.assertAlmostEqual(score,net_score,places)
def runTest(self): data = CompactFactor(read_csv(open('tetrad_asia.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) g = ICPattern(ci) self.assertEquals(g.shd(self._asia_pdag),5) self.assertEquals(self._tetrad_pdag.shd(self._asia_pdag),4) # I think tetrad is wrong (in terms of implementation) self.assertEquals(g.shd(self._tetrad_pdag),1)
def test_smalldata_sql(self): rawdata = read_csv(open('fake_data')) dat = Data(rawdata) self.assertEqual(dat.makeFactor([]).z(),5) self.samefactor(dat.makeFactor(['bar']),Factor(['bar'],[2,2,1])) self.samefactor(dat.makeFactor(['blah']),Factor(['blah'],[3,1,1])) self.samefactor(dat.makeFactor(['foo']),Factor(['foo'],[3,1,1,0])) self.samefactor(dat.makeFactor(['bar','foo']), Factor(['bar','foo'],[2,0,0,0,1,1,0,0,0,0,1,0]))
def test_smalldata_sql(self): rawdata = read_csv(open('fake_data')) dat = Data(rawdata) self.assertEqual(dat.makeFactor([]).z(), 5) self.samefactor(dat.makeFactor(['bar']), Factor(['bar'], [2, 2, 1])) self.samefactor(dat.makeFactor(['blah']), Factor(['blah'], [3, 1, 1])) self.samefactor(dat.makeFactor(['foo']), Factor(['foo'], [3, 1, 1, 0])) self.samefactor( dat.makeFactor(['bar', 'foo']), Factor(['bar', 'foo'], [2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]))
def runTest(self): data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) print ci._ind for a,b in pairs(data.variables()): if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1': self.assert_(ci.has_independence(a, b)) self.assert_(not ci.has_independence_involving(a,b,'X3')) else: print a,b self.assert_(not ci.has_independence(a,b)) data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) for a,b in pairs(data.variables()): if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1': self.assert_(ci.has_independence(a, b)) self.assert_(not ci.has_independence_involving(a,b,'X3')) else: print a,b self.assert_(not ci.has_independence(a,b))
def test_ipf2(self): # just tests termination alarm = CompactFactor(read_csv(open('alarm_1K.dat'))) vs = list(alarm.variables()) vs.append(vs[0]) marginals = {} model = RFR() for i in range(len(vs) - 1): hyperedge = frozenset(vs[i:i + 2]) model *= Factor(hyperedge) marginals[hyperedge] = alarm[hyperedge].normalised() model.ipf(marginals, 0.001)
def test_ipf2(self): # just tests termination alarm = CompactFactor(read_csv(open('alarm_1K.dat'))) vs = list(alarm.variables()) vs.append(vs[0]) marginals = {} model = RFR() for i in range(len(vs)-1): hyperedge = frozenset(vs[i:i+2]) model *= Factor(hyperedge) marginals[hyperedge] = alarm[hyperedge].normalised() model.ipf(marginals,0.001)
def setUp(self): from gPy.Variables import Domain self.domain = Domain() self.bnm = BN(domain=self.domain) self.bnm.from_dnet(read_dnet('Asia.dnet')) self.cptdict = {} # taken directly from Netica output self.marginals = [ Factor((('VisitAsia'),), [0.99,0.01]), Factor((('Tuberculosis'),), [0.9896,0.0104]), Factor((('Smoking'),), [0.5,0.5]), Factor((('Cancer'),), [0.945,0.055]), Factor((('TbOrCa'),), [0.93517, 0.064828]), Factor((('XRay'),), [0.11029, 0.88971]), Factor((('Bronchitis'),), [0.55,0.45]), Factor((('Dyspnea'),), [0.56403,0.43597]) ] # taken directly from Netica output self.cond_marginals = [ Factor((('VisitAsia'),), [0.95192,0.048077]), Factor((('Tuberculosis'),), [0,1]), Factor((('Smoking'),), [0.52381,0.47619]), #other marginals are conditional on these values #Factor((('Cancer'),), # [1,0]), #Factor((('TbOrCa'),), # [0,1]), Factor((('XRay'),), [0.98, 0.02]), Factor((('Bronchitis'),), [0.55714,0.44286]), Factor((('Dyspnea'),), [0.21143,0.78857]) ] for cpt in self.bnm: self.cptdict[cpt.child()] = cpt self.rawdata = read_csv(open('alarm_1K.dat'))
def setUp(self): from gPy.Variables import Domain self.domain = Domain() self.bnm = BN(domain=self.domain) self.bnm.from_dnet(read_dnet('Asia.dnet')) self.cptdict = {} # taken directly from Netica output self.marginals = [ Factor((('VisitAsia'), ), [0.99, 0.01]), Factor((('Tuberculosis'), ), [0.9896, 0.0104]), Factor((('Smoking'), ), [0.5, 0.5]), Factor((('Cancer'), ), [0.945, 0.055]), Factor((('TbOrCa'), ), [0.93517, 0.064828]), Factor((('XRay'), ), [0.11029, 0.88971]), Factor((('Bronchitis'), ), [0.55, 0.45]), Factor((('Dyspnea'), ), [0.56403, 0.43597]) ] # taken directly from Netica output self.cond_marginals = [ Factor((('VisitAsia'), ), [0.95192, 0.048077]), Factor((('Tuberculosis'), ), [0, 1]), Factor((('Smoking'), ), [0.52381, 0.47619]), #other marginals are conditional on these values #Factor((('Cancer'),), # [1,0]), #Factor((('TbOrCa'),), # [0,1]), Factor((('XRay'), ), [0.98, 0.02]), Factor((('Bronchitis'), ), [0.55714, 0.44286]), Factor((('Dyspnea'), ), [0.21143, 0.78857]) ] for cpt in self.bnm: self.cptdict[cpt.child()] = cpt self.rawdata = read_csv(open('alarm_1K.dat'))
def setUp(self): self.rawdata = read_csv(open('alarm_1K.dat'))
from gPy.IO import read_csv from gPy.Parameters import CompactFactor import gPy.Parameters florida = CompactFactor(read_csv(open('florida.dat'))) #create a normal factor table = florida['Murderer', 'Sentence', 'Victim'] print table print 'Number of observations is %d' % table.z() gPy.Parameters.precision = 6 print table.normalised()
from gPy.Data import Data2 from gPy.IO import read_csv import sys, gzip data = Data2(read_csv( open('/home/jc/godot/research/icml08/data/asia_100.data')), rmin=3) #for k, v in data._data.items(): # print k, v #print data.marginal(['VisitAsia']) #print data.marginal(['VisitAsia','TbOrCa','XRay','Dyspnea']) #data = Data2(read_csv(gzip.open('/home/jc/godot/research/icml08/data/insurance_100.data.gz')),rmin=5) #for k, v in data._data.items(): # print k, v #sys.exit() data._test(['VisitAsia', 'TbOrCa', 'XRay', 'Dyspnea']) #data._test(['Accident','ILiCost']) sys.exit() print data.marginal(['Accident', 'ILiCost']) vs = sorted(data._variables) for v in vs: for w in vs: for z in vs: print v, w, z, data.marginal(frozenset([v, w, z]))
from gPy.IO import read_csv from gPy.Parameters import CompactFactor from gPy.Demos import marginalise_gui cancer = CompactFactor(read_csv(open('cancer.dat'))) #create a normal factor data = cancer['Smoker', 'Cancer', 'Bronchitis'] marginalise_gui(data.normalised())
from gPy.Examples import asia from gPy.Parameters import CompactFactor from gPy.IO import read_csv import sys data = CompactFactor(read_csv(open(sys.argv[1]))) print asia.bdeu_score(data) def score_adg(adg,data): print '^^^^' for child in adg.vertices(): parents = adg.parents(child) family = parents | set([child]) data_cpt = data.makeFactor(family).makeCPT(child,False) print child, data_cpt.bdeu_score() print 'vvvvv' print adg = asia.adg() score_adg(adg,data) adg.remove_arrow('Smoking','Cancer') score_adg(adg,data)
from gPy.Examples import asia from gPy.Data import CompactFactor from gPy.IO import read_csv import sys data = CompactFactor(read_csv(open(sys.argv[1]))) print asia.bdeu_score(data) def score_adg(adg,data): print '^^^^' for child in adg.vertices(): parents = adg.parents(child) family = parents | set([child]) data_cpt = data.makeFactor(family).makeCPT(child,False) print child, data_cpt.bdeu_score() print 'vvvvv' print adg = asia.adg() score_adg(adg,data)
"""Throwaway script to test BIC score search """ #from gPy.Data import Data from gPy.Data import CompactFactor import sys, gzip from gPy.IO import read_csv data = CompactFactor(read_csv(gzip.open('/home/jc/godot/research/icml08/data/insurance_100.data.gz'))) for v in data.variables(): print v print print data.bic_search(v)
from gPy.Data import Data2 from gPy.IO import read_csv import sys, gzip data = Data2(read_csv(open("/home/jc/godot/research/icml08/data/asia_100.data")), rmin=3) # for k, v in data._data.items(): # print k, v # print data.marginal(['VisitAsia']) # print data.marginal(['VisitAsia','TbOrCa','XRay','Dyspnea']) # data = Data2(read_csv(gzip.open('/home/jc/godot/research/icml08/data/insurance_100.data.gz')),rmin=5) # for k, v in data._data.items(): # print k, v # sys.exit() data._test(["VisitAsia", "TbOrCa", "XRay", "Dyspnea"]) # data._test(['Accident','ILiCost']) sys.exit() print data.marginal(["Accident", "ILiCost"]) vs = sorted(data._variables) for v in vs: for w in vs: for z in vs: print v, w, z, data.marginal(frozenset([v, w, z]))