def transition_model(state): # given a hidden state, return the Distribution for the next hidden state x, y, action = state next_states = Distribution() # we can always stay where we are if action == 'stay': next_states[(x, y, 'stay')] = .2 else: next_states[(x, y, 'stay')] = .1 if y > 0: # we can go up if action == 'stay': next_states[(x, y - 1, 'up')] = .2 if action == 'up': next_states[(x, y - 1, 'up')] = .9 if y < GRID_HEIGHT - 1: # we can go down if action == 'stay': next_states[(x, y + 1, 'down')] = .2 if action == 'down': next_states[(x, y + 1, 'down')] = .9 if x > 0: # we can go left if action == 'stay': next_states[(x - 1, y, 'left')] = .2 if action == 'left': next_states[(x - 1, y, 'left')] = .9 if x < GRID_WIDTH - 1: # we can go right if action == 'stay': next_states[(x + 1, y, 'right')] = .2 if action == 'right': next_states[(x + 1, y, 'right')] = .9 next_states.renormalize() return next_states
def transition_model(state): # given a hidden state, return the Distribution for the next hidden state x, y, action = state next_states = Distribution() # we can always stay where we are if action == 'stay': next_states[(x, y, 'stay')] = .2 else: next_states[(x, y, 'stay')] = .1 if y > 0: # we can go up if action == 'stay': next_states[(x, y-1, 'up')] = .2 if action == 'up': next_states[(x, y-1, 'up')] = .9 if y < GRID_HEIGHT - 1: # we can go down if action == 'stay': next_states[(x, y+1, 'down')] = .2 if action == 'down': next_states[(x, y+1, 'down')] = .9 if x > 0: # we can go left if action == 'stay': next_states[(x-1, y, 'left')] = .2 if action == 'left': next_states[(x-1, y, 'left')] = .9 if x < GRID_WIDTH - 1: # we can go right if action == 'stay': next_states[(x+1, y, 'right')] = .2 if action == 'right': next_states[(x+1, y, 'right')] = .9 next_states.renormalize() return next_states
def run_experiment(error, alpha, m, iter=1000): samples = [] # true distribution true_dist = sample_distribution(alpha) # empirical distribution dist = Distribution() # initialize h1 randomly h = sample_hypothesis(alpha) # [hypothesis, is_comp] samples.append(h) dist[h] += 1 print("simulating Markov chain with", iter, "iterations...") for i in range(iter): if i%100 == 0: print("iteration number:", i) # run Markov chain tm = transition_matrix(h, error, alpha, m) h_next = tm.sample() samples.append(h_next) dist[h_next] += 1 h = h_next dist.normalize() return [samples, dist, true_dist]
def uniform_transition_model(state): next_state_distribution = Distribution() valid_next_states = get_valid_next_states(state) for next_state in valid_next_states: next_state_distribution[next_state] += 1 next_state_distribution.renormalize() return next_state_distribution
def compute_marginal(message, node_potential): marginal = Distribution() for hidden_state in all_possible_hidden_states: if hidden_state in message and \ hidden_state in node_potential: value = message[hidden_state] * node_potential[hidden_state] if value > 0: # only store entries with nonzero prob. marginal[hidden_state] = value marginal.renormalize() return marginal
def test_factor(self): fn = lambda c: ord(c) - ord('a') dist = Distribution({'a': 1, 'b': 2}) dist.factor('a', 0.5) self.almostEqual(list(dist.as_numpy_array(fn)), [0.2, 0.8]) dist = Distribution({'a': 1, 'b': 2, 'c': 3}) dist.factor('b', 3) self.almostEqual(list(dist.as_numpy_array(fn)), [0.1, 0.6, 0.3])
def spread_observation_model(state, radius=1): # given a hidden state, return the Distribution for its observation x, y, action = state observed_states = Distribution() for x_new in range(x - radius, x + radius + 1): for y_new in range(y - radius, y + radius + 1): if x_new >= 0 and x_new <= GRID_WIDTH - 1 and \ y_new >= 0 and y_new <= GRID_HEIGHT - 1: observed_states[(x_new, y_new)] = 1. observed_states.renormalize() return observed_states
def get_transition_mus_and_probs(self, mu, a): """Gets information about possible transitions for the action. This is the equivalent of self.mdp.get_transition_states_and_probs() for generalized states. So, it returns a list of (next_mu, prob) pairs, where next_mu must be a generalized state. """ s = self.extract_state_from_mu(mu) base_result = self.mdp.get_transition_states_and_probs(s, a) most_likely_state, _ = max(base_result, key=lambda tup: tup[1]) dist = Distribution(dict(base_result)) dist.factor(most_likely_state, self.calibration_factor) return list(dist.get_dict().items())
def discretized_gaussian_observation_model(state, sigma=1): # given a hidden state, return the Distribution for its observation x, y, action = state observed_states = Distribution() # x_new, y_new = np.meshgrid(range(GRID_WIDTH), range(GRID_HEIGHT)) # values = np.exp( -( (x_new - x)**2 + (y_new -y)**2 )/(2.*sigma) ) for x_new in range(GRID_WIDTH): for y_new in range(GRID_HEIGHT): observed_states[(x_new, y_new)] = \ np.exp(-( (x_new - x)**2 + (y_new - y)**2 )/(2.*sigma)) observed_states.renormalize() return observed_states
def initial_distribution(): # returns a Distribution for the initial hidden state prior = Distribution() for x in range(GRID_WIDTH): for y in range(GRID_HEIGHT): prior[(x, y, 'stay')] = 1. / (GRID_WIDTH * GRID_HEIGHT) return prior
def action(state): # Walls are invalid states and the MDP will refuse to give an action for # them. However, the VIN's architecture requires it to provide an action # distribution for walls too, so hardcode it to always be STAY. x, y = state if mdp.walls[y][x]: return dist_to_numpy(Distribution({Direction.STAY: 1})) return dist_to_numpy(agent.get_action_distribution(state))
def get_action_distribution(self, s): """Returns a Distribution over actions. Note that this is a normal state s, not a generalized state mu. """ mu = self.extend_state_to_mu(s) actions = self.mdp.get_actions(s) if self.beta is not None: q_vals = np.array([self.qvalue(mu, a) for a in actions]) q_vals = q_vals - np.mean(q_vals) # To prevent overflow in exp action_dist = np.exp(self.beta * q_vals) return Distribution(dict(zip(actions, action_dist))) best_value, best_actions = float("-inf"), [] for a in actions: action_value = self.qvalue(mu, a) if action_value > best_value: best_value, best_actions = action_value, [a] elif action_value == best_value: best_actions.append(a) return Distribution({a: 1 for a in best_actions})
def chill(show_all=False): if show_all: ignored_packages = () else: ignored_packages = { 'pip', 'pip-chill', 'wheel', 'setuptools', 'pkg-resources' } # Gather all packages that are requirements and will be auto-installed. distributions = {} dependencies = {} for distribution in pip.get_installed_distributions(): if distribution.key in ignored_packages: continue if distribution.key in dependencies: dependencies[distribution.key].version = distribution.version else: distributions[distribution.key] = \ Distribution(distribution.key, distribution.version) for requirement in distribution.requires(): if requirement.key not in ignored_packages: if requirement.key in dependencies: dependencies[requirement.key] \ .required_by.add(distribution.key) else: dependencies[requirement.key] = Distribution( requirement.key, required_by=(distribution.key, )) if requirement.key in distributions: dependencies[requirement.key].version \ = distributions.pop(requirement.key).version return sorted(distributions.values()), sorted(dependencies.values())
def compute_marginal(particles, weights): """ Essentially computes an *empirical* distribution given particles and weights Inputs ------ particles: a list where each element is a hidden state value weights: a list where element i is the weight for particle i Output ------ A Distribution, where each hidden state has probability proportional to the total weight for that hidden state (which may be from multiple particles) """ marginal = Distribution() # TODO: Your code here raise NotImplementedError return marginal
def test_equality(self): self.assertEqual(Distribution({ 'a': 0.5, 'b': 0.5 }), Distribution({ 'a': 1, 'b': 1 })) self.assertEqual(Distribution({ 'a': 0.5, 'b': 0.5, 'c': 0 }), Distribution({ 'a': 0.5, 'b': 0.5, 'd': 0 })) self.assertNotEqual(Distribution({ 'a': 1, 'b': 1 }), Distribution({'a': 1}))
def forward_backward( all_possible_hidden_states, all_possible_observed_states, prior_distribution, transition_model, observation_model, observations, ): """ Inputs ------ all_possible_hidden_states: a list of possible hidden states all_possible_observed_states: a list of possible observed states prior_distribution: a distribution over states transition_model: a function that takes a hidden state and returns a Distribution for the next state observation_model: a function that takes a hidden state and returns a Distribution for the observation from that hidden state observations: a list of observations, one per hidden state (a missing observation is encoded as None) Output ------ A list of marginal distributions at each time step; each distribution should be encoded as a Distribution (see the Distribution class in robot.py and see how it is used in both robot.py and the function generate_data() above, and the i-th Distribution should correspond to time step i """ num_time_steps = len(observations) # ------------------------------------------------------------------------- # Fold observations into singleton potentials # phis = [] # phis[n] is the singleton potential for node n for n in range(num_time_steps): potential = Distribution() observed_state = observations[n] if n == 0: for hidden_state in prior_distribution: value = prior_distribution[hidden_state] if observed_state is not None: value *= observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value else: for hidden_state in all_possible_hidden_states: if observed_state is None: # singleton potential should be identically 1 potential[hidden_state] = 1.0 else: value = observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value assert len(potential.keys()) > 0, ( "Invalid observation at time %d. Maybe you \ forgot the --use-spread-output argument?" % n ) phis.append(potential) # we need not recompute edge potentials since they're given by the # transition model: phi(x_i, x_j) = transition_model[x_i](x_j), # where j = i+1 # ------------------------------------------------------------------------- # Forward pass # forward_messages = [] # compute message from non-existent node -1 to node 0 message = Distribution() for hidden_state in all_possible_hidden_states: message[hidden_state] = 1.0 message.renormalize() forward_messages.append(message) for n in range(num_time_steps - 1): # compute message from node n to node n+1 message = Distribution() ## the commented block below is easier to understand but is slow; ## a faster version is below that switches the order of the for loops ## and reduces the number of states that we iterate over # for next_hidden_state in all_possible_hidden_states: # value = 0. # # only loop over hidden states with nonzero singleton potential! # for hidden_state in phis[n]: # value += phis[n][hidden_state] * \ # transition_model(hidden_state)[next_hidden_state] * \ # forward_messages[-1][hidden_state] # if value > 0: # only store entries with nonzero prob. # message[next_hidden_state] = value ## faster version of the commented block above # 1. only loop over hidden states with nonzero singleton potential! for hidden_state in phis[n]: # 2. only loop over possible next hidden states given current # hidden state for next_hidden_state in transition_model(hidden_state): factor = ( phis[n][hidden_state] * transition_model(hidden_state)[next_hidden_state] * forward_messages[-1][hidden_state] ) if factor > 0: # only store entries with nonzero prob. if next_hidden_state in message: message[next_hidden_state] += factor else: message[next_hidden_state] = factor message.renormalize() forward_messages.append(message) # ------------------------------------------------------------------------- # Pre-processing to speed up the backward pass: cache for each hidden # state what the possible previous hidden states are # possible_prev_hidden_states = {} for hidden_state in all_possible_hidden_states: for next_hidden_state in transition_model(hidden_state): if next_hidden_state in possible_prev_hidden_states: possible_prev_hidden_states[next_hidden_state].add(hidden_state) else: possible_prev_hidden_states[next_hidden_state] = set([hidden_state]) # ------------------------------------------------------------------------- # Backward pass # backward_messages = [] # compute message from non-existent node <num_time_steps> to node # <num_time_steps>-1 message = Distribution() for hidden_state in all_possible_hidden_states: message[hidden_state] = 1.0 message.renormalize() backward_messages.append(message) for n in range(num_time_steps - 2, -1, -1): # compute message from node n+1 to n message = Distribution() ## again, I've commented out a block that's easier to understand but ## slow; the faster version is below # for hidden_state in all_possible_hidden_states: # value = 0. # for next_hidden_state in transition_model(hidden_state): # value += phis[n+1][next_hidden_state] * \ # transition_model(hidden_state)[next_hidden_state] * \ # backward_messages[0][next_hidden_state] # if value > 0: # only store entries with nonzero prob. # message[hidden_state] = value ## faster version # 1. only loop over next hidden states with nonzero potential! for next_hidden_state in phis[n + 1]: # 2. only loop over possible previous hidden states for hidden_state in possible_prev_hidden_states[next_hidden_state]: factor = ( phis[n + 1][next_hidden_state] * transition_model(hidden_state)[next_hidden_state] * backward_messages[0][next_hidden_state] ) if factor > 0: # only store entries with nonzero prob. if hidden_state in message: message[hidden_state] += factor else: message[hidden_state] = factor message.renormalize() backward_messages.insert(0, message) # ------------------------------------------------------------------------- # Compute marginals # marginals = [] for n in range(num_time_steps): marginal = Distribution() for hidden_state in all_possible_hidden_states: if hidden_state in forward_messages[n] and hidden_state in backward_messages[n] and hidden_state in phis[n]: value = forward_messages[n][hidden_state] * backward_messages[n][hidden_state] * phis[n][hidden_state] if value > 0: # only store entries with nonzero prob. marginal[hidden_state] = value marginal.renormalize() marginals.append(marginal) # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv ### YOUR CODE HERE: Estimate marginals & pairwise marginals pairwise_marginals = [None] * (num_time_steps - 1) # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ return (marginals, pairwise_marginals)
def forward(all_possible_hidden_states, prior_distribution, transition_model, observation_model, observations): """ Inputs ------ all_possible_hidden_states: a list of possible hidden states prior_distribution: a distribution over states transition_model: a function that takes a hidden state and returns a Distribution for the next state observation_model: a function that takes a hidden state and returns a Distribution for the observation from that hidden state observations: a list of observations, one per hidden state (a missing observation is encoded as None) Output ------ This function is a Python generator! It calculates outputs "on demand"; the i-th output should be the marginal distribution for time step i. """ num_time_steps = len(observations) #------------------------------------------------------------------------- # Forward pass # def compute_marginal(message, node_potential): marginal = Distribution() for hidden_state in all_possible_hidden_states: if hidden_state in message and \ hidden_state in node_potential: value = message[hidden_state] * node_potential[hidden_state] if value > 0: # only store entries with nonzero prob. marginal[hidden_state] = value marginal.renormalize() return marginal # compute message from non-existent node -1 to node 0 message = Distribution() for hidden_state in all_possible_hidden_states: message[hidden_state] = 1. message.renormalize() # compute node potential for time step 0 node_potential = Distribution() observed_state = observations[0] for hidden_state in prior_distribution: value = prior_distribution[hidden_state] if observed_state is not None: value *= observation_model(hidden_state)[observed_state] if value > 0: node_potential[hidden_state] = value yield compute_marginal(message, node_potential) prev_message = message prev_node_potential = node_potential for n in range(1, num_time_steps): message = Distribution() node_potential = Distribution() observed_state = observations[n] # compute message from node n-1 to n and fill in node potential for # time step n for hidden_state in all_possible_hidden_states: # only loop over possible next hidden states given current # hidden state for next_hidden_state in transition_model(hidden_state): factor = prev_node_potential[hidden_state] * \ transition_model(hidden_state)[next_hidden_state] * \ prev_message[hidden_state] if factor > 0: # only store entries with nonzero prob. if next_hidden_state in message: message[next_hidden_state] += factor else: message[next_hidden_state] = factor if observed_state is not None: value = observation_model(hidden_state)[observed_state] if value > 0: node_potential[hidden_state] = value else: node_potential[hidden_state] = 1. message.renormalize() yield compute_marginal(message, node_potential) prev_message = message prev_node_potential = node_potential
def test_as_numpy_array(self): dist = Distribution({'a': 1, 'b': 2, 'd': 2}) fn = lambda c: ord(c) - ord('a') self.almostEqual(list(dist.as_numpy_array(fn)), [0.2, 0.4, 0, 0.4]) self.almostEqual(list(dist.as_numpy_array(fn, 5)), [0.2, 0.4, 0, 0.4, 0])
def test_sample(self): dist = Distribution({'a': 1, 'b': 1}) samples = [dist.sample() for _ in range(200)] self.assertTrue(samples.count('a') > 10) self.assertTrue(samples.count('b') > 10)
def Viterbi( all_possible_hidden_states, all_possible_observed_states, prior_distribution, transition_model, observation_model, observations, ): """ Inputs ------ See the list of inputs for the function forward_backward() above. Output ------ A list of esimated hidden states, each encoded as a tuple (<x>, <y>, <action>) """ num_time_steps = len(observations) # Below is an implementation of the Min-Sum algorithm presented in class # specialized to the HMM case messages = [] # best values so far back_pointers = [] # back-pointers for best values so far # ------------------------------------------------------------------------- # Fold observations into singleton potentials # phis = [] # phis[n] is the singleton potential for node n for n in range(num_time_steps): potential = Distribution() observed_state = observations[n] if n == 0: for hidden_state in prior_distribution: value = prior_distribution[hidden_state] if observed_state is not None: value *= observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value else: for hidden_state in all_possible_hidden_states: if observed_state is None: # singleton potential should be identically 1 potential[hidden_state] = 1.0 else: value = observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value assert len(potential.keys()) > 0, ( "Invalid observation at time %d. Maybe you \ forgot the --use-spread-output argument?" % n ) phis.append(potential) # ------------------------------------------------------------------------- # Forward pass # # handle initial time step differently initial_message = {} for hidden_state in prior_distribution: value = -careful_log(phis[0][hidden_state]) if value < float("inf"): # only store entries with nonzero prob. initial_message[hidden_state] = value messages.append(initial_message) # rest of the time steps for n in range(1, num_time_steps): prev_message = messages[-1] new_message = {} new_back_pointer = {} # only loop over hidden states with nonzero singleton potential! for hidden_state in phis[n]: values = [] for prev_hidden_state in prev_message: value = ( prev_message[prev_hidden_state] - careful_log(transition_model(prev_hidden_state)[hidden_state]) - careful_log(phis[n][hidden_state]) ) if value < float("inf"): # only store entries with nonzero prob. values.append((prev_hidden_state, value)) if len(values) > 0: best_prev_hidden_state, best_value = min(values, key=lambda x: x[1]) new_message[hidden_state] = best_value new_back_pointer[hidden_state] = best_prev_hidden_state messages.append(new_message) back_pointers.append(new_back_pointer) # ------------------------------------------------------------------------- # Backward pass (follow back-pointers) # estimated_hidden_states = [] # handle last time step differently last_message = messages[-1] minimum = np.inf arg_min = None for hidden_state in last_message: if last_message[hidden_state] < minimum: minimum = last_message[hidden_state] arg_min = hidden_state estimated_hidden_states.append(arg_min) # rest of the time steps for n in range(num_time_steps - 2, -1, -1): next_back_pointers = back_pointers[n] best_hidden_state = next_back_pointers[estimated_hidden_states[0]] estimated_hidden_states.insert(0, best_hidden_state) return estimated_hidden_states
def second_best(all_possible_hidden_states, all_possible_observed_states, prior_distribution, transition_model, observation_model, observations): """ Inputs ------ See the list of inputs for the function forward_backward() above. Output ------ A list of esimated hidden states, each encoded as a tuple (<x>, <y>, <action>) """ num_time_steps = len(observations) # Basically for each (possible) hidden state at time step i, we need to # keep track of the best previous hidden state AND the second best # previous hidden state--where we need to keep track of TWO back pointers # per (possible) hidden state at each time step! messages = [] # best values so far messages2 = [] # second-best values so far back_pointers = [] # per time step per hidden state, we now need # *two* back-pointers #------------------------------------------------------------------------- # Fold observations into singleton potentials # phis = [] # phis[n] is the singleton potential for node n for n in range(num_time_steps): potential = Distribution() observed_state = observations[n] if n == 0: for hidden_state in prior_distribution: value = prior_distribution[hidden_state] if observed_state is not None: value *= observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value else: for hidden_state in all_possible_hidden_states: if observed_state is None: # singleton potential should be identically 1 potential[hidden_state] = 1. else: value = observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value phis.append(potential) #------------------------------------------------------------------------- # Forward pass # # handle initial time step differently initial_message = {} for hidden_state in prior_distribution: value = -careful_log(phis[0][hidden_state]) if value < float('inf'): # only store entries with nonzero prob. initial_message[hidden_state] = value messages.append(initial_message) initial_message2 = {} # there is no second-best option messages2.append(initial_message2) # rest of the time steps for n in range(1, num_time_steps): prev_message = messages[-1] prev_message2 = messages2[-1] new_message = {} new_message2 = {} new_back_pointers = {} # need to store 2 per possible hidden state for hidden_state in phis[n]: # only look at possible hidden states given observation values = [] # each entry in values will be a tuple of the form: # (<value>, <previous hidden state>, # <which back pointer we followed>), # where <which back pointer we followed> is 0 (best back pointer) # or 1 (second-best back pointer) # iterate through best previous values for prev_hidden_state in prev_message: value = prev_message[prev_hidden_state] - \ careful_log(transition_model(prev_hidden_state)[ \ hidden_state]) - \ careful_log(phis[n][hidden_state]) if value < float('inf'): # only store entries with nonzero prob. values.append((value, prev_hidden_state, 0)) # also iterate through second-best previous values for prev_hidden_state in prev_message2: value = prev_message2[prev_hidden_state] - \ careful_log(transition_model(prev_hidden_state)[ \ hidden_state]) - \ careful_log(phis[n][hidden_state]) if value < float('inf'): # only store entries with nonzero prob. values.append((value, prev_hidden_state, 1)) if len(values) > 0: # this part could actually be sped up by not using a sorting # algorithm... sorted_values = sorted(values, key=lambda x: x[0]) best_value, best_prev_hidden_state, which_back_pointer = \ sorted_values[0] # for the best value, the back pointer should *always* be 0, # meaning that we follow the best back pointer and not the # second best if len(values) > 1: best_value2, best_prev_hidden_state2, which_back_pointer2\ = sorted_values[1] else: best_value2 = float('inf') best_prev_hidden_state2 = None which_back_pointer2 = None new_message[hidden_state] = best_value new_message2[hidden_state] = best_value2 new_back_pointers[hidden_state] = \ ( (best_prev_hidden_state, which_back_pointer), (best_prev_hidden_state2, which_back_pointer2) ) messages.append(new_message) messages2.append(new_message2) back_pointers.append(new_back_pointers) #------------------------------------------------------------------------- # Backward pass (follow back-pointers) # estimated_hidden_states = [] # handle last time step differently values = [] for hidden_state, value in messages[-1].iteritems(): values.append((value, hidden_state, 0)) for hidden_state, value in messages2[-1].iteritems(): values.append((value, hidden_state, 1)) if len(values) > 1: # this part could actually be sped up by not using a sorting # algorithm... sorted_values = sorted(values, key=lambda x: x[0]) second_best_value, hidden_state, which_back_pointer = sorted_values[1] estimated_hidden_states.append(hidden_state) # rest of the time steps for n in range(num_time_steps - 2, -1, -1): next_back_pointers = back_pointers[n] hidden_state, which_back_pointer = \ next_back_pointers[hidden_state][which_back_pointer] estimated_hidden_states.insert(0, hidden_state) else: # this happens if there isn't a second best option, which should mean # that the only possible option (the MAP estimate) is the only # solution with 0 error estimated_hidden_states = [None] * num_time_steps return estimated_hidden_states
def forward_backward(all_possible_hidden_states, all_possible_observed_states, prior_distribution, transition_model, observation_model, observations): """ Inputs ------ all_possible_hidden_states: a list of possible hidden states all_possible_observed_states: a list of possible observed states prior_distribution: a distribution over states transition_model: a function that takes a hidden state and returns a Distribution for the next state observation_model: a function that takes a hidden state and returns a Distribution for the observation from that hidden state observations: a list of observations, one per hidden state (a missing observation is encoded as None) Output ------ A list of marginal distributions at each time step; each distribution should be encoded as a Distribution (see the Distribution class in robot.py and see how it is used in both robot.py and the function generate_data() above, and the i-th Distribution should correspond to time step i """ num_time_steps = len(observations) #------------------------------------------------------------------------- # Fold observations into singleton potentials # phis = [] # phis[n] is the singleton potential for node n for n in range(num_time_steps): potential = Distribution() observed_state = observations[n] if n == 0: for hidden_state in prior_distribution: value = prior_distribution[hidden_state] if observed_state is not None: value *= observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value else: for hidden_state in all_possible_hidden_states: if observed_state is None: # singleton potential should be identically 1 potential[hidden_state] = 1. else: value = observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value assert len(potential.keys()) > 0 , \ "Invalid observation at time %d. Maybe you \ forgot the --use-spread-output argument?" %n phis.append(potential) # we need not recompute edge potentials since they're given by the # transition model: phi(x_i, x_j) = transition_model[x_i](x_j), # where j = i+1 #------------------------------------------------------------------------- # Forward pass # forward_messages = [] # compute message from non-existent node -1 to node 0 message = Distribution() for hidden_state in all_possible_hidden_states: message[hidden_state] = 1. message.renormalize() forward_messages.append(message) for n in range(num_time_steps - 1): # compute message from node n to node n+1 message = Distribution() ## the commented block below is easier to understand but is slow; ## a faster version is below that switches the order of the for loops ## and reduces the number of states that we iterate over #for next_hidden_state in all_possible_hidden_states: # value = 0. # # only loop over hidden states with nonzero singleton potential! # for hidden_state in phis[n]: # value += phis[n][hidden_state] * \ # transition_model(hidden_state)[next_hidden_state] * \ # forward_messages[-1][hidden_state] # if value > 0: # only store entries with nonzero prob. # message[next_hidden_state] = value ## faster version of the commented block above # 1. only loop over hidden states with nonzero singleton potential! for hidden_state in phis[n]: # 2. only loop over possible next hidden states given current # hidden state for next_hidden_state in transition_model(hidden_state): factor = phis[n][hidden_state] * \ transition_model(hidden_state)[next_hidden_state] * \ forward_messages[-1][hidden_state] if factor > 0: # only store entries with nonzero prob. if next_hidden_state in message: message[next_hidden_state] += factor else: message[next_hidden_state] = factor message.renormalize() forward_messages.append(message) #------------------------------------------------------------------------- # Pre-processing to speed up the backward pass: cache for each hidden # state what the possible previous hidden states are # possible_prev_hidden_states = {} for hidden_state in all_possible_hidden_states: for next_hidden_state in transition_model(hidden_state): if next_hidden_state in possible_prev_hidden_states: possible_prev_hidden_states[next_hidden_state].add( \ hidden_state) else: possible_prev_hidden_states[next_hidden_state] = \ set([hidden_state]) #------------------------------------------------------------------------- # Backward pass # backward_messages = [] # compute message from non-existent node <num_time_steps> to node # <num_time_steps>-1 message = Distribution() for hidden_state in all_possible_hidden_states: message[hidden_state] = 1. message.renormalize() backward_messages.append(message) for n in range(num_time_steps - 2, -1, -1): # compute message from node n+1 to n message = Distribution() ## again, I've commented out a block that's easier to understand but ## slow; the faster version is below #for hidden_state in all_possible_hidden_states: # value = 0. # for next_hidden_state in transition_model(hidden_state): # value += phis[n+1][next_hidden_state] * \ # transition_model(hidden_state)[next_hidden_state] * \ # backward_messages[0][next_hidden_state] # if value > 0: # only store entries with nonzero prob. # message[hidden_state] = value ## faster version # 1. only loop over next hidden states with nonzero potential! for next_hidden_state in phis[n + 1]: # 2. only loop over possible previous hidden states for hidden_state in possible_prev_hidden_states[next_hidden_state]: factor = phis[n+1][next_hidden_state] * \ transition_model(hidden_state)[next_hidden_state] * \ backward_messages[0][next_hidden_state] if factor > 0: # only store entries with nonzero prob. if hidden_state in message: message[hidden_state] += factor else: message[hidden_state] = factor message.renormalize() backward_messages.insert(0, message) #------------------------------------------------------------------------- # Compute marginals # marginals = [] for n in range(num_time_steps): marginal = Distribution() for hidden_state in all_possible_hidden_states: if hidden_state in forward_messages[n] and \ hidden_state in backward_messages[n] and \ hidden_state in phis[n]: value = forward_messages[n][hidden_state] * \ backward_messages[n][hidden_state] * \ phis[n][hidden_state] if value > 0: # only store entries with nonzero prob. marginal[hidden_state] = value marginal.renormalize() marginals.append(marginal) # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv ### YOUR CODE HERE: Estimate marginals & pairwise marginals pairwise_marginals = [None] * (num_time_steps - 1) # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ return (marginals, pairwise_marginals)
def optimal_agent_test(self, agent): grid = [ 'XXXXXXXXX', 'X9X6XA X', 'X X X XXX', 'X 2X', 'XXXXXXXXX' ] n, s, e, w, stay = self.all_actions mdp = GridworldMdp(grid, living_reward=-0.1) env = Mdp(mdp) agent.set_mdp(mdp) start_state = mdp.get_start_state() # Action distribution action_dist = agent.get_action_distribution(start_state) self.assertEqual(action_dist, Distribution({s: 1})) # Trajectory actions, _ = self.run_on_env(agent, env, gamma=0.95, episode_length=10) self.assertEqual(actions, [s, s, w, w, w, w, n, n, stay, stay]) # Same thing, but with a bigger discount mdp = GridworldMdp(grid, living_reward=-0.001) env = Mdp(mdp) agent = agents.OptimalAgent(gamma=0.5, num_iters=20) agent.set_mdp(mdp) start_state = mdp.get_start_state() # Values # Inaccurate because I ignore living reward and we only use 20 # iterations of value iteration, so only check to 2 places self.assertAlmostEqual(agent.value(start_state), 0.25, places=2) # Action distribution action_dist = agent.get_action_distribution(start_state) self.assertEqual(action_dist, Distribution({s: 1})) # Trajectory actions, reward = self.run_on_env(agent, env, gamma=0.5, episode_length=10) # Again approximate comparison since we don't consider living rewards self.assertAlmostEqual(reward, (4 - 0.0625) / 16, places=2) self.assertEqual(actions, [s, s, e, e, stay, stay, stay, stay, stay, stay]) # Same thing, but with Boltzmann rationality agent = agents.OptimalAgent(beta=1, gamma=0.5, num_iters=20) agent.set_mdp(mdp) # Action distribution dist = agent.get_action_distribution(start_state).get_dict() nprob, sprob, eprob, wprob = dist[n], dist[s], dist[e], dist[w] for p in [nprob, sprob, eprob, wprob]: self.assertTrue(0 < p < 1) self.assertEqual(nprob, wprob) self.assertTrue(sprob > nprob) self.assertTrue(nprob > eprob) middle_state = (2, 3) dist = agent.get_action_distribution(middle_state).get_dict() nprob, sprob, eprob, wprob = dist[n], dist[s], dist[e], dist[w] for p in [nprob, sprob, eprob, wprob]: self.assertTrue(0 < p < 1) self.assertEqual(nprob, sprob) self.assertTrue(wprob > eprob) self.assertTrue(eprob > nprob)
def Viterbi(all_possible_hidden_states, all_possible_observed_states, prior_distribution, transition_model, observation_model, observations): """ Inputs ------ See the list of inputs for the function forward_backward() above. Output ------ A list of esimated hidden states, each encoded as a tuple (<x>, <y>, <action>) """ num_time_steps = len(observations) # Below is an implementation of the Min-Sum algorithm presented in class # specialized to the HMM case messages = [] # best values so far back_pointers = [] # back-pointers for best values so far #------------------------------------------------------------------------- # Fold observations into singleton potentials # phis = [] # phis[n] is the singleton potential for node n for n in range(num_time_steps): potential = Distribution() observed_state = observations[n] if n == 0: for hidden_state in prior_distribution: value = prior_distribution[hidden_state] if observed_state is not None: value *= observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value else: for hidden_state in all_possible_hidden_states: if observed_state is None: # singleton potential should be identically 1 potential[hidden_state] = 1. else: value = observation_model(hidden_state)[observed_state] if value > 0: # only store entries with nonzero prob. potential[hidden_state] = value assert len(potential.keys()) > 0 , \ "Invalid observation at time %d. Maybe you \ forgot the --use-spread-output argument?" %n phis.append(potential) #------------------------------------------------------------------------- # Forward pass # # handle initial time step differently initial_message = {} for hidden_state in prior_distribution: value = -careful_log(phis[0][hidden_state]) if value < float('inf'): # only store entries with nonzero prob. initial_message[hidden_state] = value messages.append(initial_message) # rest of the time steps for n in range(1, num_time_steps): prev_message = messages[-1] new_message = {} new_back_pointer = {} # only loop over hidden states with nonzero singleton potential! for hidden_state in phis[n]: values = [] for prev_hidden_state in prev_message: value = prev_message[prev_hidden_state] \ - careful_log(transition_model(prev_hidden_state)[ \ hidden_state]) \ - careful_log(phis[n][hidden_state]) if value < float('inf'): # only store entries with nonzero prob. values.append((prev_hidden_state, value)) if len(values) > 0: best_prev_hidden_state, best_value = \ min(values, key=lambda x: x[1]) new_message[hidden_state] = best_value new_back_pointer[hidden_state] = best_prev_hidden_state messages.append(new_message) back_pointers.append(new_back_pointer) #------------------------------------------------------------------------- # Backward pass (follow back-pointers) # estimated_hidden_states = [] # handle last time step differently last_message = messages[-1] minimum = np.inf arg_min = None for hidden_state in last_message: if last_message[hidden_state] < minimum: minimum = last_message[hidden_state] arg_min = hidden_state estimated_hidden_states.append(arg_min) # rest of the time steps for n in range(num_time_steps - 2, -1, -1): next_back_pointers = back_pointers[n] best_hidden_state = next_back_pointers[estimated_hidden_states[0]] estimated_hidden_states.insert(0, best_hidden_state) return estimated_hidden_states