/
inference.py
286 lines (226 loc) · 13.1 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# inference.py
# ------------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import random
import util
from bayesNet import Factor
from factorOperations import joinFactorsByVariableWithCallTracking, joinFactors
from factorOperations import eliminateWithCallTracking, normalize
def inferenceByEnumeration(bayesNet, queryVariables, evidenceDict):
"""
An inference by enumeration implementation provided as reference.
This function performs a probabilistic inference query that
returns the factor:
P(queryVariables | evidenceDict)
bayesNet: The Bayes Net on which we are making a query.
queryVariables: A list of the variables which are unconditioned in
the inference query.
evidenceDict: An assignment dict {variable : value} for the
variables which are presented as evidence
(conditioned) in the inference query.
"""
callTrackingList = []
joinFactorsByVariable = joinFactorsByVariableWithCallTracking(callTrackingList)
eliminate = eliminateWithCallTracking(callTrackingList)
# initialize return variables and the variables to eliminate
evidenceVariablesSet = set(evidenceDict.keys())
queryVariablesSet = set(queryVariables)
eliminationVariables = (bayesNet.variablesSet() - evidenceVariablesSet) - queryVariablesSet
# grab all factors where we know the evidence variables (to reduce the size of the tables)
currentFactorsList = bayesNet.getAllCPTsWithEvidence(evidenceDict)
# join all factors by variable
for joinVariable in bayesNet.variablesSet():
currentFactorsList, joinedFactor = joinFactorsByVariable(currentFactorsList, joinVariable)
currentFactorsList.append(joinedFactor)
# currentFactorsList should contain the connected components of the graph now as factors, must join the connected components
fullJoint = joinFactors(currentFactorsList)
# marginalize all variables that aren't query or evidence
incrementallyMarginalizedJoint = fullJoint
for eliminationVariable in eliminationVariables:
incrementallyMarginalizedJoint = eliminate(incrementallyMarginalizedJoint, eliminationVariable)
fullJointOverQueryAndEvidence = incrementallyMarginalizedJoint
# normalize so that the probability sums to one
# the input factor contains only the query variables and the evidence variables,
# both as unconditioned variables
queryConditionedOnEvidence = normalize(fullJointOverQueryAndEvidence)
# now the factor is conditioned on the evidence variables
# the order is join on all variables, then eliminate on all elimination variables
#print "callTrackingList: ", callTrackingList
return queryConditionedOnEvidence
def inferenceByVariableEliminationWithCallTracking(callTrackingList=None):
def inferenceByVariableElimination(bayesNet, queryVariables, evidenceDict, eliminationOrder):
"""
Question 4: Your inference by variable elimination implementation
This function should perform a probabilistic inference query that
returns the factor:
P(queryVariables | evidenceDict)
It should perform inference by interleaving joining on a variable
and eliminating that variable, in the order of variables according
to eliminationOrder. See inferenceByEnumeration for an example on
how to use these functions.
You need to use joinFactorsByVariable to join all of the factors
that contain a variable in order for the autograder to
recognize that you performed the correct interleaving of
joins and eliminates.
If a factor that you are about to eliminate a variable from has
only one unconditioned variable, you should not eliminate it
and instead just discard the factor. This is since the
result of the eliminate would be 1 (you marginalize
all of the unconditioned variables), but it is not a
valid factor. So this simplifies using the result of eliminate.
The sum of the probabilities should sum to one (so that it is a true
conditional probability, conditioned on the evidence).
bayesNet: The Bayes Net on which we are making a query.
queryVariables: A list of the variables which are unconditioned
in the inference query.
evidenceDict: An assignment dict {variable : value} for the
variables which are presented as evidence
(conditioned) in the inference query.
eliminationOrder: The order to eliminate the variables in.
Hint: BayesNet.getAllCPTsWithEvidence will return all the Conditional
Probability Tables even if an empty dict (or None) is passed in for
evidenceDict. In this case it will not specialize any variable domains
in the CPTs.
Useful functions:
BayesNet.getAllCPTsWithEvidence
normalize
eliminate
joinFactorsByVariable
joinFactors
"""
# this is for autograding -- don't modify
joinFactorsByVariable = joinFactorsByVariableWithCallTracking(callTrackingList)
eliminate = eliminateWithCallTracking(callTrackingList)
if eliminationOrder is None: # set an arbitrary elimination order if None given
eliminationVariables = bayesNet.variablesSet() - set(queryVariables) -\
set(evidenceDict.keys())
eliminationOrder = sorted(list(eliminationVariables))
currentFactorsList = bayesNet.getAllCPTsWithEvidence(evidenceDict)
for variable in eliminationOrder:
currentFactorsList, joinedFactor = joinFactorsByVariable(currentFactorsList,
variable)
if len(joinedFactor.unconditionedVariables()) == 1 and variable in joinedFactor.unconditionedVariables():
continue
else:
newJoinedFactor = eliminate(joinedFactor, variable)
currentFactorsList.append(newJoinedFactor)
fullJoint = joinFactors(currentFactorsList)
queryConditionedOnEvidence = normalize(fullJoint)
return queryConditionedOnEvidence
return inferenceByVariableElimination
inferenceByVariableElimination = inferenceByVariableEliminationWithCallTracking()
def sampleFromFactorRandomSource(randomSource=None):
if randomSource is None:
randomSource = random.Random()
def sampleFromFactor(factor, conditionedAssignments=None):
"""
Sample an assignment for unconditioned variables in factor with
probability equal to the probability in the row of factor
corresponding to that assignment.
factor: The factor to sample from.
conditionedAssignments: A dict of assignments for all conditioned
variables in the factor. Can only be None
if there are no conditioned variables in
factor, otherwise must be nonzero.
Useful for inferenceByLikelihoodWeightingSampling
Returns an assignmentDict that contains the conditionedAssignments but
also a random assignment of the unconditioned variables given their
probability.
"""
if conditionedAssignments is None and len(factor.conditionedVariables()) > 0:
raise ValueError, ("Conditioned assignments must be provided since \n" +
"this factor has conditionedVariables: " + "\n" +
str(factor.conditionedVariables()))
elif conditionedAssignments is not None:
conditionedVariables = set([var for var in conditionedAssignments.keys()])
if not conditionedVariables.issuperset(set(factor.conditionedVariables())):
raise ValueError, ("Factor's conditioned variables need to be a subset of the \n"
+ "conditioned assignments passed in. \n" + \
"conditionedVariables: " + str(conditionedVariables) + "\n" +
"factor.conditionedVariables: " + str(set(factor.conditionedVariables())))
# Reduce the domains of the variables that have been
# conditioned upon for this factor
newVariableDomainsDict = factor.variableDomainsDict()
for (var, assignment) in conditionedAssignments.items():
newVariableDomainsDict[var] = [assignment]
# Get the (hopefully) smaller conditional probability table
# for this variable
CPT = factor.specializeVariableDomains(newVariableDomainsDict)
else:
CPT = factor
# Get the probability of each row of the table (along with the
# assignmentDict that it corresponds to)
assignmentDicts = sorted([assignmentDict for assignmentDict in CPT.getAllPossibleAssignmentDicts()])
assignmentDictProbabilities = [CPT.getProbability(assignmentDict) for assignmentDict in assignmentDicts]
# calculate total probability in the factor and index each row by the
# cumulative sum of probability up to and including that row
currentProbability = 0.0
probabilityRange = []
for i in range(len(assignmentDicts)):
currentProbability += assignmentDictProbabilities[i]
probabilityRange.append(currentProbability)
totalProbability = probabilityRange[-1]
# sample an assignment with probability equal to the probability in the row
# for that assignment in the factor
pick = randomSource.uniform(0.0, totalProbability)
for i in range(len(assignmentDicts)):
if pick <= probabilityRange[i]:
return assignmentDicts[i]
return sampleFromFactor
sampleFromFactor = sampleFromFactorRandomSource()
def inferenceByLikelihoodWeightingSamplingRandomSource(randomSource=None):
def inferenceByLikelihoodWeightingSampling(bayesNet, queryVariables, evidenceDict, numSamples):
"""
Question 6: Inference by likelihood weighted sampling
This function should perform a probabilistic inference query that
returns the factor:
P(queryVariables | evidenceDict)
It should perform inference by performing likelihood weighting
sampling. It should sample numSamples times.
In order for the autograder's solution to match yours,
your outer loop needs to iterate over the number of samples,
with the inner loop sampling from each variable's factor.
Use the ordering of variables provided by BayesNet.linearizeVariables in
your inner loop so that the order of samples matches the autograder's.
There are typically many linearization orders of a directed acyclic
graph (DAG), however we just use a particular one.
The sum of the probabilities should sum to one (so that it is a true
conditional probability, conditioned on the evidence).
bayesNet: The Bayes Net on which we are making a query.
queryVariables: A list of the variables which are unconditioned in
the inference query.
evidenceDict: An assignment dict {variable : value} for the
variables which are presented as evidence
(conditioned) in the inference query.
numSamples: The number of samples that should be taken.
Useful functions:
sampleFromFactor
normalize
BayesNet.getCPT
BayesNet.linearizeVariables
"""
sampleFromFactor = sampleFromFactorRandomSource(randomSource)
currentFactorsList = bayesNet.getAllCPTsWithEvidence(evidenceDict)
newFactor = Factor(queryVariables, evidenceDict.keys(), currentFactorsList[0].variableDomainsDict())
for _ in range(numSamples):
w = 1.0
conditionedAssignments = {}
for variable in bayesNet.linearizeVariables():
if variable in evidenceDict:
conditionedAssignments[variable] = evidenceDict[variable]
w = w * bayesNet.getCPT(variable).getProbability(conditionedAssignments)
else:
conditionedAssignments.update(sampleFromFactor(bayesNet.getCPT(variable), conditionedAssignments))
newFactor.setProbability(conditionedAssignments, w + newFactor.getProbability(conditionedAssignments))
return normalize(newFactor)
return inferenceByLikelihoodWeightingSampling
inferenceByLikelihoodWeightingSampling = inferenceByLikelihoodWeightingSamplingRandomSource()