Exemple #1
0
def transition_model(state):
    # given a hidden state, return the Distribution for the next hidden state
    x, y, action = state
    next_states = Distribution()

    # we can always stay where we are
    if action == 'stay':
        next_states[(x, y, 'stay')] = .2
    else:
        next_states[(x, y, 'stay')] = .1

    if y > 0:  # we can go up
        if action == 'stay':
            next_states[(x, y - 1, 'up')] = .2
        if action == 'up':
            next_states[(x, y - 1, 'up')] = .9
    if y < GRID_HEIGHT - 1:  # we can go down
        if action == 'stay':
            next_states[(x, y + 1, 'down')] = .2
        if action == 'down':
            next_states[(x, y + 1, 'down')] = .9
    if x > 0:  # we can go left
        if action == 'stay':
            next_states[(x - 1, y, 'left')] = .2
        if action == 'left':
            next_states[(x - 1, y, 'left')] = .9
    if x < GRID_WIDTH - 1:  # we can go right
        if action == 'stay':
            next_states[(x + 1, y, 'right')] = .2
        if action == 'right':
            next_states[(x + 1, y, 'right')] = .9

    next_states.renormalize()
    return next_states
Exemple #2
0
def transition_model(state):
    # given a hidden state, return the Distribution for the next hidden state
    x, y, action = state
    next_states  = Distribution()

    # we can always stay where we are
    if action == 'stay':
        next_states[(x, y, 'stay')] = .2
    else:
        next_states[(x, y, 'stay')] = .1

    if y > 0: # we can go up
        if action == 'stay':
            next_states[(x, y-1, 'up')] = .2
        if action == 'up':
            next_states[(x, y-1, 'up')] = .9
    if y < GRID_HEIGHT - 1: # we can go down
        if action == 'stay':
            next_states[(x, y+1, 'down')] = .2
        if action == 'down':
            next_states[(x, y+1, 'down')] = .9
    if x > 0: # we can go left
        if action == 'stay':
            next_states[(x-1, y, 'left')] = .2
        if action == 'left':
            next_states[(x-1, y, 'left')] = .9
    if x < GRID_WIDTH - 1: # we can go right
        if action == 'stay':
            next_states[(x+1, y, 'right')] = .2
        if action == 'right':
            next_states[(x+1, y, 'right')] = .9

    next_states.renormalize()
    return next_states
Exemple #3
0
def run_experiment(error, alpha, m, iter=1000):
	samples = []

	# true distribution
	true_dist = sample_distribution(alpha)

	# empirical distribution
	dist = Distribution()

	# initialize h1 randomly
	h = sample_hypothesis(alpha) # [hypothesis, is_comp]
	samples.append(h)
	dist[h] += 1

	print("simulating Markov chain with", iter, "iterations...")
	for i in range(iter):
		if i%100 == 0:
			print("iteration number:", i)

		# run Markov chain
		tm = transition_matrix(h, error, alpha, m)
		h_next = tm.sample()
		samples.append(h_next)
		dist[h_next] += 1
		h = h_next

	dist.normalize()

	return [samples, dist, true_dist]
Exemple #4
0
def uniform_transition_model(state):
    next_state_distribution = Distribution()
    valid_next_states = get_valid_next_states(state)
    for next_state in valid_next_states:
        next_state_distribution[next_state] += 1
    next_state_distribution.renormalize()

    return next_state_distribution
Exemple #5
0
def uniform_transition_model(state):
    next_state_distribution = Distribution()
    valid_next_states = get_valid_next_states(state)
    for next_state in valid_next_states:
        next_state_distribution[next_state] += 1
    next_state_distribution.renormalize()

    return next_state_distribution
Exemple #6
0
 def compute_marginal(message, node_potential):
     marginal = Distribution()
     for hidden_state in all_possible_hidden_states:
         if hidden_state in message and \
            hidden_state in node_potential:
             value = message[hidden_state] * node_potential[hidden_state]
             if value > 0:  # only store entries with nonzero prob.
                 marginal[hidden_state] = value
     marginal.renormalize()
     return marginal
 def compute_marginal(message, node_potential):
     marginal = Distribution()
     for hidden_state in all_possible_hidden_states:
         if hidden_state in message and \
            hidden_state in node_potential:
             value = message[hidden_state] * node_potential[hidden_state]
             if value > 0: # only store entries with nonzero prob.
                 marginal[hidden_state] = value
     marginal.renormalize()
     return marginal
Exemple #8
0
    def test_factor(self):
        fn = lambda c: ord(c) - ord('a')

        dist = Distribution({'a': 1, 'b': 2})
        dist.factor('a', 0.5)
        self.almostEqual(list(dist.as_numpy_array(fn)), [0.2, 0.8])

        dist = Distribution({'a': 1, 'b': 2, 'c': 3})
        dist.factor('b', 3)
        self.almostEqual(list(dist.as_numpy_array(fn)), [0.1, 0.6, 0.3])
Exemple #9
0
def spread_observation_model(state, radius=1):
    # given a hidden state, return the Distribution for its observation
    x, y, action = state
    observed_states = Distribution()

    for x_new in range(x - radius, x + radius + 1):
        for y_new in range(y - radius, y + radius + 1):
            if x_new >= 0 and x_new <= GRID_WIDTH - 1 and \
               y_new >= 0 and y_new <= GRID_HEIGHT - 1:
                observed_states[(x_new, y_new)] = 1.

    observed_states.renormalize()
    return observed_states
Exemple #10
0
def spread_observation_model(state, radius=1):
    # given a hidden state, return the Distribution for its observation
    x, y, action    = state
    observed_states = Distribution()

    for x_new in range(x - radius, x + radius + 1):
        for y_new in range(y - radius, y + radius + 1):
            if x_new >= 0 and x_new <= GRID_WIDTH - 1 and \
               y_new >= 0 and y_new <= GRID_HEIGHT - 1:
                observed_states[(x_new, y_new)] = 1.

    observed_states.renormalize()
    return observed_states
Exemple #11
0
    def get_transition_mus_and_probs(self, mu, a):
        """Gets information about possible transitions for the action.

        This is the equivalent of self.mdp.get_transition_states_and_probs() for
        generalized states. So, it returns a list of (next_mu, prob) pairs,
        where next_mu must be a generalized state.
        """
        s = self.extract_state_from_mu(mu)
        base_result = self.mdp.get_transition_states_and_probs(s, a)
        most_likely_state, _ = max(base_result, key=lambda tup: tup[1])
        dist = Distribution(dict(base_result))
        dist.factor(most_likely_state, self.calibration_factor)
        return list(dist.get_dict().items())
Exemple #12
0
def discretized_gaussian_observation_model(state, sigma=1):
    # given a hidden state, return the Distribution for its observation
    x, y, action = state
    observed_states = Distribution()

    # x_new, y_new = np.meshgrid(range(GRID_WIDTH), range(GRID_HEIGHT))
    # values = np.exp( -( (x_new - x)**2 + (y_new -y)**2 )/(2.*sigma) )

    for x_new in range(GRID_WIDTH):
        for y_new in range(GRID_HEIGHT):
            observed_states[(x_new, y_new)] = \
                np.exp(-( (x_new - x)**2 + (y_new - y)**2 )/(2.*sigma))

    observed_states.renormalize()
    return observed_states
Exemple #13
0
def discretized_gaussian_observation_model(state, sigma=1):
    # given a hidden state, return the Distribution for its observation
    x, y, action    = state
    observed_states = Distribution()

    # x_new, y_new = np.meshgrid(range(GRID_WIDTH), range(GRID_HEIGHT))
    # values = np.exp( -( (x_new - x)**2 + (y_new -y)**2 )/(2.*sigma) )

    for x_new in range(GRID_WIDTH):
        for y_new in range(GRID_HEIGHT):
            observed_states[(x_new, y_new)] = \
                np.exp(-( (x_new - x)**2 + (y_new - y)**2 )/(2.*sigma))

    observed_states.renormalize()
    return observed_states
Exemple #14
0
def initial_distribution():
    # returns a Distribution for the initial hidden state
    prior = Distribution()
    for x in range(GRID_WIDTH):
        for y in range(GRID_HEIGHT):
            prior[(x, y, 'stay')] = 1. / (GRID_WIDTH * GRID_HEIGHT)
    return prior
Exemple #15
0
 def action(state):
     # Walls are invalid states and the MDP will refuse to give an action for
     # them. However, the VIN's architecture requires it to provide an action
     # distribution for walls too, so hardcode it to always be STAY.
     x, y = state
     if mdp.walls[y][x]:
         return dist_to_numpy(Distribution({Direction.STAY: 1}))
     return dist_to_numpy(agent.get_action_distribution(state))
Exemple #16
0
    def get_action_distribution(self, s):
        """Returns a Distribution over actions.

        Note that this is a normal state s, not a generalized state mu.
        """
        mu = self.extend_state_to_mu(s)
        actions = self.mdp.get_actions(s)
        if self.beta is not None:
            q_vals = np.array([self.qvalue(mu, a) for a in actions])
            q_vals = q_vals - np.mean(q_vals)  # To prevent overflow in exp
            action_dist = np.exp(self.beta * q_vals)
            return Distribution(dict(zip(actions, action_dist)))

        best_value, best_actions = float("-inf"), []
        for a in actions:
            action_value = self.qvalue(mu, a)
            if action_value > best_value:
                best_value, best_actions = action_value, [a]
            elif action_value == best_value:
                best_actions.append(a)
        return Distribution({a: 1 for a in best_actions})
Exemple #17
0
def chill(show_all=False):
    if show_all:
        ignored_packages = ()
    else:
        ignored_packages = {
            'pip', 'pip-chill', 'wheel', 'setuptools', 'pkg-resources'
        }

    # Gather all packages that are requirements and will be auto-installed.
    distributions = {}
    dependencies = {}

    for distribution in pip.get_installed_distributions():
        if distribution.key in ignored_packages:
            continue

        if distribution.key in dependencies:
            dependencies[distribution.key].version = distribution.version
        else:
            distributions[distribution.key] = \
                Distribution(distribution.key, distribution.version)

        for requirement in distribution.requires():
            if requirement.key not in ignored_packages:
                if requirement.key in dependencies:
                    dependencies[requirement.key] \
                        .required_by.add(distribution.key)
                else:
                    dependencies[requirement.key] = Distribution(
                        requirement.key, required_by=(distribution.key, ))

            if requirement.key in distributions:
                dependencies[requirement.key].version \
                    = distributions.pop(requirement.key).version

    return sorted(distributions.values()), sorted(dependencies.values())
Exemple #18
0
def compute_marginal(particles, weights):
    """
    Essentially computes an *empirical* distribution given particles and
    weights

    Inputs
    ------
    particles: a list where each element is a hidden state value

    weights: a list where element i is the weight for particle i

    Output
    ------
    A Distribution, where each hidden state has probability proportional to
    the total weight for that hidden state (which may be from multiple
    particles)
    """
    marginal = Distribution()

    # TODO: Your code here
    raise NotImplementedError

    return marginal
Exemple #19
0
 def test_equality(self):
     self.assertEqual(Distribution({
         'a': 0.5,
         'b': 0.5
     }), Distribution({
         'a': 1,
         'b': 1
     }))
     self.assertEqual(Distribution({
         'a': 0.5,
         'b': 0.5,
         'c': 0
     }), Distribution({
         'a': 0.5,
         'b': 0.5,
         'd': 0
     }))
     self.assertNotEqual(Distribution({
         'a': 1,
         'b': 1
     }), Distribution({'a': 1}))
Exemple #20
0
def forward_backward(
    all_possible_hidden_states,
    all_possible_observed_states,
    prior_distribution,
    transition_model,
    observation_model,
    observations,
):
    """
    Inputs
    ------
    all_possible_hidden_states: a list of possible hidden states

    all_possible_observed_states: a list of possible observed states

    prior_distribution: a distribution over states

    transition_model: a function that takes a hidden state and returns a
        Distribution for the next state

    observation_model: a function that takes a hidden state and returns a
        Distribution for the observation from that hidden state

    observations: a list of observations, one per hidden state
        (a missing observation is encoded as None)

    Output
    ------
    A list of marginal distributions at each time step; each distribution
    should be encoded as a Distribution (see the Distribution class in
    robot.py and see how it is used in both robot.py and the function
    generate_data() above, and the i-th Distribution should correspond to time
    step i
    """

    num_time_steps = len(observations)

    # -------------------------------------------------------------------------
    # Fold observations into singleton potentials
    #
    phis = []  # phis[n] is the singleton potential for node n
    for n in range(num_time_steps):
        potential = Distribution()
        observed_state = observations[n]
        if n == 0:
            for hidden_state in prior_distribution:
                value = prior_distribution[hidden_state]
                if observed_state is not None:
                    value *= observation_model(hidden_state)[observed_state]
                if value > 0:  # only store entries with nonzero prob.
                    potential[hidden_state] = value
        else:
            for hidden_state in all_possible_hidden_states:
                if observed_state is None:
                    # singleton potential should be identically 1
                    potential[hidden_state] = 1.0
                else:
                    value = observation_model(hidden_state)[observed_state]
                    if value > 0:  # only store entries with nonzero prob.
                        potential[hidden_state] = value
        assert len(potential.keys()) > 0, (
            "Invalid observation at time %d. Maybe you \
                forgot the --use-spread-output argument?"
            % n
        )
        phis.append(potential)

    # we need not recompute edge potentials since they're given by the
    # transition model: phi(x_i, x_j) = transition_model[x_i](x_j),
    # where j = i+1

    # -------------------------------------------------------------------------
    # Forward pass
    #
    forward_messages = []

    # compute message from non-existent node -1 to node 0
    message = Distribution()
    for hidden_state in all_possible_hidden_states:
        message[hidden_state] = 1.0
    message.renormalize()
    forward_messages.append(message)

    for n in range(num_time_steps - 1):
        # compute message from node n to node n+1
        message = Distribution()

        ## the commented block below is easier to understand but is slow;
        ## a faster version is below that switches the order of the for loops
        ## and reduces the number of states that we iterate over

        # for next_hidden_state in all_possible_hidden_states:
        #    value = 0.
        #    # only loop over hidden states with nonzero singleton potential!
        #    for hidden_state in phis[n]:
        #        value += phis[n][hidden_state] * \
        #                 transition_model(hidden_state)[next_hidden_state] * \
        #                 forward_messages[-1][hidden_state]
        #    if value > 0: # only store entries with nonzero prob.
        #        message[next_hidden_state] = value

        ## faster version of the commented block above
        # 1. only loop over hidden states with nonzero singleton potential!
        for hidden_state in phis[n]:
            # 2. only loop over possible next hidden states given current
            #    hidden state
            for next_hidden_state in transition_model(hidden_state):
                factor = (
                    phis[n][hidden_state]
                    * transition_model(hidden_state)[next_hidden_state]
                    * forward_messages[-1][hidden_state]
                )
                if factor > 0:  # only store entries with nonzero prob.
                    if next_hidden_state in message:
                        message[next_hidden_state] += factor
                    else:
                        message[next_hidden_state] = factor

        message.renormalize()
        forward_messages.append(message)

    # -------------------------------------------------------------------------
    # Pre-processing to speed up the backward pass: cache for each hidden
    # state what the possible previous hidden states are
    #
    possible_prev_hidden_states = {}
    for hidden_state in all_possible_hidden_states:
        for next_hidden_state in transition_model(hidden_state):
            if next_hidden_state in possible_prev_hidden_states:
                possible_prev_hidden_states[next_hidden_state].add(hidden_state)
            else:
                possible_prev_hidden_states[next_hidden_state] = set([hidden_state])

    # -------------------------------------------------------------------------
    # Backward pass
    #
    backward_messages = []

    # compute message from non-existent node <num_time_steps> to node
    # <num_time_steps>-1
    message = Distribution()
    for hidden_state in all_possible_hidden_states:
        message[hidden_state] = 1.0
    message.renormalize()
    backward_messages.append(message)

    for n in range(num_time_steps - 2, -1, -1):
        # compute message from node n+1 to n
        message = Distribution()

        ## again, I've commented out a block that's easier to understand but
        ## slow; the faster version is below

        # for hidden_state in all_possible_hidden_states:
        #    value = 0.
        #    for next_hidden_state in transition_model(hidden_state):
        #        value += phis[n+1][next_hidden_state] * \
        #                 transition_model(hidden_state)[next_hidden_state] * \
        #                 backward_messages[0][next_hidden_state]
        #    if value > 0: # only store entries with nonzero prob.
        #        message[hidden_state] = value

        ## faster version
        # 1. only loop over next hidden states with nonzero potential!
        for next_hidden_state in phis[n + 1]:
            # 2. only loop over possible previous hidden states
            for hidden_state in possible_prev_hidden_states[next_hidden_state]:
                factor = (
                    phis[n + 1][next_hidden_state]
                    * transition_model(hidden_state)[next_hidden_state]
                    * backward_messages[0][next_hidden_state]
                )
                if factor > 0:  # only store entries with nonzero prob.
                    if hidden_state in message:
                        message[hidden_state] += factor
                    else:
                        message[hidden_state] = factor

        message.renormalize()
        backward_messages.insert(0, message)

    # -------------------------------------------------------------------------
    # Compute marginals
    #
    marginals = []
    for n in range(num_time_steps):
        marginal = Distribution()
        for hidden_state in all_possible_hidden_states:
            if hidden_state in forward_messages[n] and hidden_state in backward_messages[n] and hidden_state in phis[n]:
                value = forward_messages[n][hidden_state] * backward_messages[n][hidden_state] * phis[n][hidden_state]
                if value > 0:  # only store entries with nonzero prob.
                    marginal[hidden_state] = value
        marginal.renormalize()
        marginals.append(marginal)

    # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
    ### YOUR CODE HERE: Estimate marginals & pairwise marginals
    pairwise_marginals = [None] * (num_time_steps - 1)

    # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    return (marginals, pairwise_marginals)
def forward(all_possible_hidden_states,
            prior_distribution,
            transition_model,
            observation_model,
            observations):
    """
    Inputs
    ------
    all_possible_hidden_states: a list of possible hidden states

    prior_distribution: a distribution over states

    transition_model: a function that takes a hidden state and returns a
        Distribution for the next state

    observation_model: a function that takes a hidden state and returns a
        Distribution for the observation from that hidden state

    observations: a list of observations, one per hidden state
        (a missing observation is encoded as None)

    Output
    ------
    This function is a Python generator! It calculates outputs "on demand";
    the i-th output should be the marginal distribution for time step i.
    """
    num_time_steps = len(observations)

    #-------------------------------------------------------------------------
    # Forward pass
    #
    def compute_marginal(message, node_potential):
        marginal = Distribution()
        for hidden_state in all_possible_hidden_states:
            if hidden_state in message and \
               hidden_state in node_potential:
                value = message[hidden_state] * node_potential[hidden_state]
                if value > 0: # only store entries with nonzero prob.
                    marginal[hidden_state] = value
        marginal.renormalize()
        return marginal

    # compute message from non-existent node -1 to node 0
    message = Distribution()
    for hidden_state in all_possible_hidden_states:
        message[hidden_state] = 1.
    message.renormalize()

    # compute node potential for time step 0
    node_potential = Distribution()
    observed_state = observations[0]
    for hidden_state in prior_distribution:
        value = prior_distribution[hidden_state]
        if observed_state is not None:
            value *= observation_model(hidden_state)[observed_state]
        if value > 0:
            node_potential[hidden_state] = value

    yield compute_marginal(message, node_potential)
    prev_message        = message
    prev_node_potential = node_potential

    for n in range(1, num_time_steps):
        message = Distribution()

        node_potential = Distribution()
        observed_state = observations[n]

        # compute message from node n-1 to n and fill in node potential for
        # time step n
        for hidden_state in all_possible_hidden_states:
            # only loop over possible next hidden states given current
            # hidden state
            for next_hidden_state in transition_model(hidden_state):
                factor = prev_node_potential[hidden_state] * \
                         transition_model(hidden_state)[next_hidden_state] * \
                         prev_message[hidden_state]
                if factor > 0: # only store entries with nonzero prob.
                    if next_hidden_state in message:
                        message[next_hidden_state] += factor
                    else:
                        message[next_hidden_state] = factor

            if observed_state is not None:
                value = observation_model(hidden_state)[observed_state]
                if value > 0:
                    node_potential[hidden_state] = value
            else:
                node_potential[hidden_state] = 1.

        message.renormalize()
        yield compute_marginal(message, node_potential)
        prev_message        = message
        prev_node_potential = node_potential
Exemple #22
0
 def test_as_numpy_array(self):
     dist = Distribution({'a': 1, 'b': 2, 'd': 2})
     fn = lambda c: ord(c) - ord('a')
     self.almostEqual(list(dist.as_numpy_array(fn)), [0.2, 0.4, 0, 0.4])
     self.almostEqual(list(dist.as_numpy_array(fn, 5)),
                      [0.2, 0.4, 0, 0.4, 0])
Exemple #23
0
 def test_sample(self):
     dist = Distribution({'a': 1, 'b': 1})
     samples = [dist.sample() for _ in range(200)]
     self.assertTrue(samples.count('a') > 10)
     self.assertTrue(samples.count('b') > 10)
Exemple #24
0
def Viterbi(
    all_possible_hidden_states,
    all_possible_observed_states,
    prior_distribution,
    transition_model,
    observation_model,
    observations,
):
    """
    Inputs
    ------
    See the list of inputs for the function forward_backward() above.

    Output
    ------
    A list of esimated hidden states, each encoded as a tuple
    (<x>, <y>, <action>)
    """

    num_time_steps = len(observations)

    # Below is an implementation of the Min-Sum algorithm presented in class
    # specialized to the HMM case

    messages = []  # best values so far
    back_pointers = []  # back-pointers for best values so far

    # -------------------------------------------------------------------------
    # Fold observations into singleton potentials
    #
    phis = []  # phis[n] is the singleton potential for node n
    for n in range(num_time_steps):
        potential = Distribution()
        observed_state = observations[n]
        if n == 0:
            for hidden_state in prior_distribution:
                value = prior_distribution[hidden_state]
                if observed_state is not None:
                    value *= observation_model(hidden_state)[observed_state]
                if value > 0:  # only store entries with nonzero prob.
                    potential[hidden_state] = value
        else:
            for hidden_state in all_possible_hidden_states:
                if observed_state is None:
                    # singleton potential should be identically 1
                    potential[hidden_state] = 1.0
                else:
                    value = observation_model(hidden_state)[observed_state]
                    if value > 0:  # only store entries with nonzero prob.
                        potential[hidden_state] = value
        assert len(potential.keys()) > 0, (
            "Invalid observation at time %d. Maybe you \
                forgot the --use-spread-output argument?"
            % n
        )
        phis.append(potential)

    # -------------------------------------------------------------------------
    # Forward pass
    #

    # handle initial time step differently
    initial_message = {}
    for hidden_state in prior_distribution:
        value = -careful_log(phis[0][hidden_state])
        if value < float("inf"):  # only store entries with nonzero prob.
            initial_message[hidden_state] = value
    messages.append(initial_message)

    # rest of the time steps
    for n in range(1, num_time_steps):
        prev_message = messages[-1]
        new_message = {}
        new_back_pointer = {}

        # only loop over hidden states with nonzero singleton potential!
        for hidden_state in phis[n]:
            values = []
            for prev_hidden_state in prev_message:
                value = (
                    prev_message[prev_hidden_state]
                    - careful_log(transition_model(prev_hidden_state)[hidden_state])
                    - careful_log(phis[n][hidden_state])
                )
                if value < float("inf"):
                    # only store entries with nonzero prob.
                    values.append((prev_hidden_state, value))

            if len(values) > 0:
                best_prev_hidden_state, best_value = min(values, key=lambda x: x[1])
                new_message[hidden_state] = best_value
                new_back_pointer[hidden_state] = best_prev_hidden_state

        messages.append(new_message)
        back_pointers.append(new_back_pointer)

    # -------------------------------------------------------------------------
    # Backward pass (follow back-pointers)
    #
    estimated_hidden_states = []

    # handle last time step differently
    last_message = messages[-1]
    minimum = np.inf
    arg_min = None
    for hidden_state in last_message:
        if last_message[hidden_state] < minimum:
            minimum = last_message[hidden_state]
            arg_min = hidden_state
    estimated_hidden_states.append(arg_min)

    # rest of the time steps
    for n in range(num_time_steps - 2, -1, -1):
        next_back_pointers = back_pointers[n]
        best_hidden_state = next_back_pointers[estimated_hidden_states[0]]
        estimated_hidden_states.insert(0, best_hidden_state)

    return estimated_hidden_states
Exemple #25
0
def forward(all_possible_hidden_states, prior_distribution, transition_model,
            observation_model, observations):
    """
    Inputs
    ------
    all_possible_hidden_states: a list of possible hidden states

    prior_distribution: a distribution over states

    transition_model: a function that takes a hidden state and returns a
        Distribution for the next state

    observation_model: a function that takes a hidden state and returns a
        Distribution for the observation from that hidden state

    observations: a list of observations, one per hidden state
        (a missing observation is encoded as None)

    Output
    ------
    This function is a Python generator! It calculates outputs "on demand";
    the i-th output should be the marginal distribution for time step i.
    """
    num_time_steps = len(observations)

    #-------------------------------------------------------------------------
    # Forward pass
    #
    def compute_marginal(message, node_potential):
        marginal = Distribution()
        for hidden_state in all_possible_hidden_states:
            if hidden_state in message and \
               hidden_state in node_potential:
                value = message[hidden_state] * node_potential[hidden_state]
                if value > 0:  # only store entries with nonzero prob.
                    marginal[hidden_state] = value
        marginal.renormalize()
        return marginal

    # compute message from non-existent node -1 to node 0
    message = Distribution()
    for hidden_state in all_possible_hidden_states:
        message[hidden_state] = 1.
    message.renormalize()

    # compute node potential for time step 0
    node_potential = Distribution()
    observed_state = observations[0]
    for hidden_state in prior_distribution:
        value = prior_distribution[hidden_state]
        if observed_state is not None:
            value *= observation_model(hidden_state)[observed_state]
        if value > 0:
            node_potential[hidden_state] = value

    yield compute_marginal(message, node_potential)
    prev_message = message
    prev_node_potential = node_potential

    for n in range(1, num_time_steps):
        message = Distribution()

        node_potential = Distribution()
        observed_state = observations[n]

        # compute message from node n-1 to n and fill in node potential for
        # time step n
        for hidden_state in all_possible_hidden_states:
            # only loop over possible next hidden states given current
            # hidden state
            for next_hidden_state in transition_model(hidden_state):
                factor = prev_node_potential[hidden_state] * \
                         transition_model(hidden_state)[next_hidden_state] * \
                         prev_message[hidden_state]
                if factor > 0:  # only store entries with nonzero prob.
                    if next_hidden_state in message:
                        message[next_hidden_state] += factor
                    else:
                        message[next_hidden_state] = factor

            if observed_state is not None:
                value = observation_model(hidden_state)[observed_state]
                if value > 0:
                    node_potential[hidden_state] = value
            else:
                node_potential[hidden_state] = 1.

        message.renormalize()
        yield compute_marginal(message, node_potential)
        prev_message = message
        prev_node_potential = node_potential
Exemple #26
0
def second_best(all_possible_hidden_states, all_possible_observed_states,
                prior_distribution, transition_model, observation_model,
                observations):
    """
    Inputs
    ------
    See the list of inputs for the function forward_backward() above.

    Output
    ------
    A list of esimated hidden states, each encoded as a tuple
    (<x>, <y>, <action>)
    """

    num_time_steps = len(observations)

    # Basically for each (possible) hidden state at time step i, we need to
    # keep track of the best previous hidden state AND the second best
    # previous hidden state--where we need to keep track of TWO back pointers
    # per (possible) hidden state at each time step!

    messages = []  # best values so far
    messages2 = []  # second-best values so far
    back_pointers = []  # per time step per hidden state, we now need
    # *two* back-pointers

    #-------------------------------------------------------------------------
    # Fold observations into singleton potentials
    #
    phis = []  # phis[n] is the singleton potential for node n
    for n in range(num_time_steps):
        potential = Distribution()
        observed_state = observations[n]
        if n == 0:
            for hidden_state in prior_distribution:
                value = prior_distribution[hidden_state]
                if observed_state is not None:
                    value *= observation_model(hidden_state)[observed_state]
                if value > 0:  # only store entries with nonzero prob.
                    potential[hidden_state] = value
        else:
            for hidden_state in all_possible_hidden_states:
                if observed_state is None:
                    # singleton potential should be identically 1
                    potential[hidden_state] = 1.
                else:
                    value = observation_model(hidden_state)[observed_state]
                    if value > 0:  # only store entries with nonzero prob.
                        potential[hidden_state] = value
        phis.append(potential)

    #-------------------------------------------------------------------------
    # Forward pass
    #

    # handle initial time step differently
    initial_message = {}
    for hidden_state in prior_distribution:
        value = -careful_log(phis[0][hidden_state])
        if value < float('inf'):  # only store entries with nonzero prob.
            initial_message[hidden_state] = value
    messages.append(initial_message)
    initial_message2 = {}  # there is no second-best option
    messages2.append(initial_message2)

    # rest of the time steps
    for n in range(1, num_time_steps):
        prev_message = messages[-1]
        prev_message2 = messages2[-1]
        new_message = {}
        new_message2 = {}
        new_back_pointers = {}  # need to store 2 per possible hidden state

        for hidden_state in phis[n]:
            # only look at possible hidden states given observation

            values = []
            # each entry in values will be a tuple of the form:
            # (<value>, <previous hidden state>,
            #  <which back pointer we followed>),
            # where <which back pointer we followed> is 0 (best back pointer)
            # or 1 (second-best back pointer)

            # iterate through best previous values
            for prev_hidden_state in prev_message:
                value = prev_message[prev_hidden_state] - \
                        careful_log(transition_model(prev_hidden_state)[ \
                                        hidden_state]) - \
                        careful_log(phis[n][hidden_state])
                if value < float('inf'):
                    # only store entries with nonzero prob.
                    values.append((value, prev_hidden_state, 0))

            # also iterate through second-best previous values
            for prev_hidden_state in prev_message2:
                value = prev_message2[prev_hidden_state] - \
                        careful_log(transition_model(prev_hidden_state)[ \
                                        hidden_state]) - \
                        careful_log(phis[n][hidden_state])
                if value < float('inf'):
                    # only store entries with nonzero prob.
                    values.append((value, prev_hidden_state, 1))

            if len(values) > 0:
                # this part could actually be sped up by not using a sorting
                # algorithm...
                sorted_values = sorted(values, key=lambda x: x[0])
                best_value, best_prev_hidden_state, which_back_pointer = \
                    sorted_values[0]

                # for the best value, the back pointer should *always* be 0,
                # meaning that we follow the best back pointer and not the
                # second best

                if len(values) > 1:
                    best_value2, best_prev_hidden_state2, which_back_pointer2\
                        = sorted_values[1]
                else:
                    best_value2 = float('inf')
                    best_prev_hidden_state2 = None
                    which_back_pointer2 = None

                new_message[hidden_state] = best_value
                new_message2[hidden_state] = best_value2
                new_back_pointers[hidden_state] = \
                    ( (best_prev_hidden_state, which_back_pointer),
                      (best_prev_hidden_state2, which_back_pointer2) )

        messages.append(new_message)
        messages2.append(new_message2)
        back_pointers.append(new_back_pointers)

    #-------------------------------------------------------------------------
    # Backward pass (follow back-pointers)
    #
    estimated_hidden_states = []

    # handle last time step differently
    values = []
    for hidden_state, value in messages[-1].iteritems():
        values.append((value, hidden_state, 0))
    for hidden_state, value in messages2[-1].iteritems():
        values.append((value, hidden_state, 1))

    if len(values) > 1:
        # this part could actually be sped up by not using a sorting
        # algorithm...
        sorted_values = sorted(values, key=lambda x: x[0])
        second_best_value, hidden_state, which_back_pointer = sorted_values[1]

        estimated_hidden_states.append(hidden_state)

        # rest of the time steps
        for n in range(num_time_steps - 2, -1, -1):
            next_back_pointers = back_pointers[n]
            hidden_state, which_back_pointer = \
                next_back_pointers[hidden_state][which_back_pointer]
            estimated_hidden_states.insert(0, hidden_state)
    else:
        # this happens if there isn't a second best option, which should mean
        # that the only possible option (the MAP estimate) is the only
        # solution with 0 error
        estimated_hidden_states = [None] * num_time_steps

    return estimated_hidden_states
Exemple #27
0
def forward_backward(all_possible_hidden_states, all_possible_observed_states,
                     prior_distribution, transition_model, observation_model,
                     observations):
    """
    Inputs
    ------
    all_possible_hidden_states: a list of possible hidden states

    all_possible_observed_states: a list of possible observed states

    prior_distribution: a distribution over states

    transition_model: a function that takes a hidden state and returns a
        Distribution for the next state

    observation_model: a function that takes a hidden state and returns a
        Distribution for the observation from that hidden state

    observations: a list of observations, one per hidden state
        (a missing observation is encoded as None)

    Output
    ------
    A list of marginal distributions at each time step; each distribution
    should be encoded as a Distribution (see the Distribution class in
    robot.py and see how it is used in both robot.py and the function
    generate_data() above, and the i-th Distribution should correspond to time
    step i
    """

    num_time_steps = len(observations)

    #-------------------------------------------------------------------------
    # Fold observations into singleton potentials
    #
    phis = []  # phis[n] is the singleton potential for node n
    for n in range(num_time_steps):
        potential = Distribution()
        observed_state = observations[n]
        if n == 0:
            for hidden_state in prior_distribution:
                value = prior_distribution[hidden_state]
                if observed_state is not None:
                    value *= observation_model(hidden_state)[observed_state]
                if value > 0:  # only store entries with nonzero prob.
                    potential[hidden_state] = value
        else:
            for hidden_state in all_possible_hidden_states:
                if observed_state is None:
                    # singleton potential should be identically 1
                    potential[hidden_state] = 1.
                else:
                    value = observation_model(hidden_state)[observed_state]
                    if value > 0:  # only store entries with nonzero prob.
                        potential[hidden_state] = value
        assert len(potential.keys()) > 0 , \
                "Invalid observation at time %d. Maybe you \
                forgot the --use-spread-output argument?"                                                         %n
        phis.append(potential)

    # we need not recompute edge potentials since they're given by the
    # transition model: phi(x_i, x_j) = transition_model[x_i](x_j),
    # where j = i+1

    #-------------------------------------------------------------------------
    # Forward pass
    #
    forward_messages = []

    # compute message from non-existent node -1 to node 0
    message = Distribution()
    for hidden_state in all_possible_hidden_states:
        message[hidden_state] = 1.
    message.renormalize()
    forward_messages.append(message)

    for n in range(num_time_steps - 1):
        # compute message from node n to node n+1
        message = Distribution()

        ## the commented block below is easier to understand but is slow;
        ## a faster version is below that switches the order of the for loops
        ## and reduces the number of states that we iterate over

        #for next_hidden_state in all_possible_hidden_states:
        #    value = 0.
        #    # only loop over hidden states with nonzero singleton potential!
        #    for hidden_state in phis[n]:
        #        value += phis[n][hidden_state] * \
        #                 transition_model(hidden_state)[next_hidden_state] * \
        #                 forward_messages[-1][hidden_state]
        #    if value > 0: # only store entries with nonzero prob.
        #        message[next_hidden_state] = value

        ## faster version of the commented block above
        # 1. only loop over hidden states with nonzero singleton potential!
        for hidden_state in phis[n]:
            # 2. only loop over possible next hidden states given current
            #    hidden state
            for next_hidden_state in transition_model(hidden_state):
                factor = phis[n][hidden_state] * \
                         transition_model(hidden_state)[next_hidden_state] * \
                         forward_messages[-1][hidden_state]
                if factor > 0:  # only store entries with nonzero prob.
                    if next_hidden_state in message:
                        message[next_hidden_state] += factor
                    else:
                        message[next_hidden_state] = factor

        message.renormalize()
        forward_messages.append(message)

    #-------------------------------------------------------------------------
    # Pre-processing to speed up the backward pass: cache for each hidden
    # state what the possible previous hidden states are
    #
    possible_prev_hidden_states = {}
    for hidden_state in all_possible_hidden_states:
        for next_hidden_state in transition_model(hidden_state):
            if next_hidden_state in possible_prev_hidden_states:
                possible_prev_hidden_states[next_hidden_state].add( \
                    hidden_state)
            else:
                possible_prev_hidden_states[next_hidden_state] = \
                    set([hidden_state])

    #-------------------------------------------------------------------------
    # Backward pass
    #
    backward_messages = []

    # compute message from non-existent node <num_time_steps> to node
    # <num_time_steps>-1
    message = Distribution()
    for hidden_state in all_possible_hidden_states:
        message[hidden_state] = 1.
    message.renormalize()
    backward_messages.append(message)

    for n in range(num_time_steps - 2, -1, -1):
        # compute message from node n+1 to n
        message = Distribution()

        ## again, I've commented out a block that's easier to understand but
        ## slow; the faster version is below

        #for hidden_state in all_possible_hidden_states:
        #    value = 0.
        #    for next_hidden_state in transition_model(hidden_state):
        #        value += phis[n+1][next_hidden_state] * \
        #                 transition_model(hidden_state)[next_hidden_state] * \
        #                 backward_messages[0][next_hidden_state]
        #    if value > 0: # only store entries with nonzero prob.
        #        message[hidden_state] = value

        ## faster version
        # 1. only loop over next hidden states with nonzero potential!
        for next_hidden_state in phis[n + 1]:
            # 2. only loop over possible previous hidden states
            for hidden_state in possible_prev_hidden_states[next_hidden_state]:
                factor = phis[n+1][next_hidden_state] * \
                         transition_model(hidden_state)[next_hidden_state] * \
                         backward_messages[0][next_hidden_state]
                if factor > 0:  # only store entries with nonzero prob.
                    if hidden_state in message:
                        message[hidden_state] += factor
                    else:
                        message[hidden_state] = factor

        message.renormalize()
        backward_messages.insert(0, message)

    #-------------------------------------------------------------------------
    # Compute marginals
    #
    marginals = []
    for n in range(num_time_steps):
        marginal = Distribution()
        for hidden_state in all_possible_hidden_states:
            if hidden_state in forward_messages[n] and \
               hidden_state in backward_messages[n] and \
               hidden_state in phis[n]:
                value = forward_messages[n][hidden_state] * \
                        backward_messages[n][hidden_state] * \
                        phis[n][hidden_state]
                if value > 0:  # only store entries with nonzero prob.
                    marginal[hidden_state] = value
        marginal.renormalize()
        marginals.append(marginal)

    # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
    ### YOUR CODE HERE: Estimate marginals & pairwise marginals
    pairwise_marginals = [None] * (num_time_steps - 1)

    # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    return (marginals, pairwise_marginals)
Exemple #28
0
    def optimal_agent_test(self, agent):
        grid = [
            'XXXXXXXXX', 'X9X6XA  X', 'X X X XXX', 'X      2X', 'XXXXXXXXX'
        ]
        n, s, e, w, stay = self.all_actions

        mdp = GridworldMdp(grid, living_reward=-0.1)
        env = Mdp(mdp)
        agent.set_mdp(mdp)
        start_state = mdp.get_start_state()

        # Action distribution
        action_dist = agent.get_action_distribution(start_state)
        self.assertEqual(action_dist, Distribution({s: 1}))

        # Trajectory
        actions, _ = self.run_on_env(agent, env, gamma=0.95, episode_length=10)
        self.assertEqual(actions, [s, s, w, w, w, w, n, n, stay, stay])

        # Same thing, but with a bigger discount
        mdp = GridworldMdp(grid, living_reward=-0.001)
        env = Mdp(mdp)
        agent = agents.OptimalAgent(gamma=0.5, num_iters=20)
        agent.set_mdp(mdp)
        start_state = mdp.get_start_state()

        # Values
        # Inaccurate because I ignore living reward and we only use 20
        # iterations of value iteration, so only check to 2 places
        self.assertAlmostEqual(agent.value(start_state), 0.25, places=2)

        # Action distribution
        action_dist = agent.get_action_distribution(start_state)
        self.assertEqual(action_dist, Distribution({s: 1}))

        # Trajectory
        actions, reward = self.run_on_env(agent,
                                          env,
                                          gamma=0.5,
                                          episode_length=10)
        # Again approximate comparison since we don't consider living rewards
        self.assertAlmostEqual(reward, (4 - 0.0625) / 16, places=2)
        self.assertEqual(actions,
                         [s, s, e, e, stay, stay, stay, stay, stay, stay])

        # Same thing, but with Boltzmann rationality
        agent = agents.OptimalAgent(beta=1, gamma=0.5, num_iters=20)
        agent.set_mdp(mdp)

        # Action distribution
        dist = agent.get_action_distribution(start_state).get_dict()
        nprob, sprob, eprob, wprob = dist[n], dist[s], dist[e], dist[w]
        for p in [nprob, sprob, eprob, wprob]:
            self.assertTrue(0 < p < 1)
        self.assertEqual(nprob, wprob)
        self.assertTrue(sprob > nprob)
        self.assertTrue(nprob > eprob)

        middle_state = (2, 3)
        dist = agent.get_action_distribution(middle_state).get_dict()
        nprob, sprob, eprob, wprob = dist[n], dist[s], dist[e], dist[w]
        for p in [nprob, sprob, eprob, wprob]:
            self.assertTrue(0 < p < 1)
        self.assertEqual(nprob, sprob)
        self.assertTrue(wprob > eprob)
        self.assertTrue(eprob > nprob)
Exemple #29
0
def Viterbi(all_possible_hidden_states, all_possible_observed_states,
            prior_distribution, transition_model, observation_model,
            observations):
    """
    Inputs
    ------
    See the list of inputs for the function forward_backward() above.

    Output
    ------
    A list of esimated hidden states, each encoded as a tuple
    (<x>, <y>, <action>)
    """

    num_time_steps = len(observations)

    # Below is an implementation of the Min-Sum algorithm presented in class
    # specialized to the HMM case

    messages = []  # best values so far
    back_pointers = []  # back-pointers for best values so far

    #-------------------------------------------------------------------------
    # Fold observations into singleton potentials
    #
    phis = []  # phis[n] is the singleton potential for node n
    for n in range(num_time_steps):
        potential = Distribution()
        observed_state = observations[n]
        if n == 0:
            for hidden_state in prior_distribution:
                value = prior_distribution[hidden_state]
                if observed_state is not None:
                    value *= observation_model(hidden_state)[observed_state]
                if value > 0:  # only store entries with nonzero prob.
                    potential[hidden_state] = value
        else:
            for hidden_state in all_possible_hidden_states:
                if observed_state is None:
                    # singleton potential should be identically 1
                    potential[hidden_state] = 1.
                else:
                    value = observation_model(hidden_state)[observed_state]
                    if value > 0:  # only store entries with nonzero prob.
                        potential[hidden_state] = value
        assert len(potential.keys()) > 0 , \
                "Invalid observation at time %d. Maybe you \
                forgot the --use-spread-output argument?"                                                         %n
        phis.append(potential)

    #-------------------------------------------------------------------------
    # Forward pass
    #

    # handle initial time step differently
    initial_message = {}
    for hidden_state in prior_distribution:
        value = -careful_log(phis[0][hidden_state])
        if value < float('inf'):  # only store entries with nonzero prob.
            initial_message[hidden_state] = value
    messages.append(initial_message)

    # rest of the time steps
    for n in range(1, num_time_steps):
        prev_message = messages[-1]
        new_message = {}
        new_back_pointer = {}

        # only loop over hidden states with nonzero singleton potential!
        for hidden_state in phis[n]:
            values = []
            for prev_hidden_state in prev_message:
                value = prev_message[prev_hidden_state] \
                        - careful_log(transition_model(prev_hidden_state)[ \
                                          hidden_state]) \
                        - careful_log(phis[n][hidden_state])
                if value < float('inf'):
                    # only store entries with nonzero prob.
                    values.append((prev_hidden_state, value))

            if len(values) > 0:
                best_prev_hidden_state, best_value = \
                    min(values, key=lambda x: x[1])
                new_message[hidden_state] = best_value
                new_back_pointer[hidden_state] = best_prev_hidden_state

        messages.append(new_message)
        back_pointers.append(new_back_pointer)

    #-------------------------------------------------------------------------
    # Backward pass (follow back-pointers)
    #
    estimated_hidden_states = []

    # handle last time step differently
    last_message = messages[-1]
    minimum = np.inf
    arg_min = None
    for hidden_state in last_message:
        if last_message[hidden_state] < minimum:
            minimum = last_message[hidden_state]
            arg_min = hidden_state
    estimated_hidden_states.append(arg_min)

    # rest of the time steps
    for n in range(num_time_steps - 2, -1, -1):
        next_back_pointers = back_pointers[n]
        best_hidden_state = next_back_pointers[estimated_hidden_states[0]]
        estimated_hidden_states.insert(0, best_hidden_state)

    return estimated_hidden_states