예제 #1
0
파일: pgm.py 프로젝트: anhpt204/anomaly
def anomaly_libpgm():
    files = glob.glob(join('data', '*.txt'))
    for file in files[0:1]:
        print file
        data=read_data_libpgm(file)
        learner = PGMLearner()
        
        result=learner.lg_estimatebn(data, indegree=3)
        
        print result.E
예제 #2
0
class TestPGMLearner(unittest.TestCase):

    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData()
        nd.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(skel, nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData()
        nda.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(skel, nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel

    def test_discrete_mle_estimateparams(self):
        result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq)
        indexa = result.Vdata['SAT']['vals'].index('lowscore')
        self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9)
        indexb = result.Vdata['Letter']['vals'].index('weak')
        self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05)

    def test_lg_mle_estimateparams(self):
        result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq)
        self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5)
        self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5)

    def test_discrete_constraint_estimatestruct(self):
        result = self.l.discrete_constraint_estimatestruct(self.samplediscseq)
        self.assertTrue(["Difficulty", "Grade"] in result.E)

    def test_lg_constraint_estimatestruct(self):
        result = self.l.lg_constraint_estimatestruct(self.samplelgseq)
        self.assertTrue(["Intelligence", "Grade"] in result.E)

    def test_discrete_condind(self):
        chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"])
        self.assertTrue(pv > .05)
        self.assertTrue(witness, ["Grade"])
        chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", [])
        self.assertTrue(pva < .05)

    def test_discrete_estimatebn(self):
        result = self.l.discrete_estimatebn(self.samplediscseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Difficulty"]["cprob"][0])

    def test_lg_estimatebn(self):
        result = self.l.lg_estimatebn(self.samplelgseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
예제 #3
0
      sample[vertex] = vertexAverages[vertex]


# Testing just 4 vertices for now (takes a really, really long time to use all of them)
keysToRemove = list(vertices)[5:]
#keysToRemove.remove('HIV')

for sample in featureVectorSamples:
  for key in keysToRemove:
    del sample[key]

# instantiate learner 
learner = PGMLearner()

# Voila, it makes us a bayesian network!
result = learner.lg_estimatebn(featureVectorSamples, pvalparam = 0.10)

# output
print json.dumps(result.Vdata, indent=2)
print json.dumps(result.E, indent=2)

# For progress report: previous things we tried!

# Hackily removes all vertices with missing values, leaving just country name and year :P
# Instead, we should totally impute values using our linear classifier!
# commonVertices = vertices
# for sample in featureVectorSamples:
#   commonVertices2 = set([v for v in commonVertices])
#   for v in commonVertices:
#     if v not in sample.keys():
#       commonVertices2.remove(v)
예제 #4
0
class TestPGMLearner(unittest.TestCase):
    
    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData()
        nd.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(skel, nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData()
        nda.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(skel, nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel

    def test_discrete_mle_estimateparams(self):
        result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq)
        indexa = result.Vdata['SAT']['vals'].index('lowscore')
        self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9)
        indexb = result.Vdata['Letter']['vals'].index('weak')
        self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05)

    def test_lg_mle_estimateparams(self):
        result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq)
        self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5)
        self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5)

    def test_discrete_constraint_estimatestruct(self):
        result = self.l.discrete_constraint_estimatestruct(self.samplediscseq)
        self.assertTrue(["Difficulty", "Grade"] in result.E)

    def test_lg_constraint_estimatestruct(self):
        result = self.l.lg_constraint_estimatestruct(self.samplelgseq)
        self.assertTrue(["Intelligence", "Grade"] in result.E)

    def test_discrete_condind(self):
        chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"])
        self.assertTrue(pv > .05)
        self.assertTrue(witness, ["Grade"])
        chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", [])  
        self.assertTrue(pva < .05)

    def test_discrete_estimatebn(self):
        result = self.l.discrete_estimatebn(self.samplediscseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Difficulty"]["cprob"][0])

    def test_lg_estimatebn(self):
        result = self.l.lg_estimatebn(self.samplelgseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
예제 #5
0
#     if k in vertices:
#       newSample[k] = sample[k]
#   condensed_feature_vectors.append(newSample)
################################################

# import pprint
# pp = pprint.PrettyPrinter(indent=4)
# pp.pprint(condensed_feature_vectors)

# instantiate learner 
learner = PGMLearner()

# Voila, it makes us a bayesian network!
bayesian_networks_by_region = {}
for region in condensed_feature_vectors_by_region:
  bayesian_networks_by_region[region] = learner.lg_estimatebn(condensed_feature_vectors_by_region[region])
  print region
  print json.dumps(bayesian_networks_by_region[region].Vdata, indent=2)
  print json.dumps(bayesian_networks_by_region[region].E, indent=2)

#Evaluation:
predictions = []
test_arrs_by_region = {}
hiv_test_arrs_by_region = {}
for i, sample in enumerate(test_arr):
  region = getRegion(sample['Country'])
  if not region: 
    break
  if region not in test_arrs_by_region:   
    test_arrs_by_region[region] = []
    hiv_test_arrs_by_region[region] = []
예제 #6
0
result = learner.lg_constraint_estimatestruct(data)

# output - toggle comment to see
#print json.dumps(result.E, indent=2)

# (12) -----------------------------------------------------------------------
# Learn entire Bayesian networks

# say I have some data
data = lgbn.randomsample(8000)

# instantiate my learner
learner = PGMLearner()

# estimate parameters
result = learner.lg_estimatebn(data)

# output - toggle comment to see
#print json.dumps(result.E, indent=2)
#print json.dumps(result.Vdata, indent=2)

# say I have some data
data = bn.randomsample(2000)

# instantiate my learner
learner = PGMLearner()

# estimate parameters
result = learner.discrete_estimatebn(data)

# output - toggle comment to see
예제 #7
0
파일: examples.py 프로젝트: Anaphory/libpgm
result = learner.lg_constraint_estimatestruct(data)

# output - toggle comment to see
#print json.dumps(result.E, indent=2)

# (12) -----------------------------------------------------------------------
# Learn entire Bayesian networks

# say I have some data
data = lgbn.randomsample(8000)

# instantiate my learner 
learner = PGMLearner()

# estimate parameters
result = learner.lg_estimatebn(data)

# output - toggle comment to see
#print json.dumps(result.E, indent=2)
#print json.dumps(result.Vdata, indent=2)

# say I have some data
data = bn.randomsample(2000)

# instantiate my learner 
learner = PGMLearner()

# estimate parameters
result = learner.discrete_estimatebn(data)

# output - toggle comment to see