コード例 #1
0
def test_udm(mdp, abstr_type, num_episodes, error_dict=None, episode_buffer=0):
    """
    Test how UDM performs on the given MDP
    """
    # Make abstract MDP
    abstr_mdp = mdp.make_abstr_mdp(abstr_type)
    # Apply corruption if argument is provided
    if error_dict:
        c_s_a = make_corruption(abstr_mdp, reassignment_dict=error_dict)
        abstr_mdp = AbstractMDP(mdp, c_s_a)
    agent = UDMAgent(mdp,
                     s_a=abstr_mdp.state_abstr,
                     transition_threshold=TRANSITION_THRESHOLD,
                     episode_buffer=episode_buffer)
    # Print abstraction
    print('State abstraction is')
    print(agent.get_abstraction_as_string())

    # Explore the number of episodes
    while agent.episode_count < num_episodes:
        agent.explore()
        if agent.episode_count > 0 and agent.end_of_episode_record[-1] == 1:
            print('On episode', agent.episode_count,
                  agent.step_count_record[agent.episode_count - 1])

    # Calculate state splits based on UDM algorithm
    transition_records = agent.get_all_transition_returns()
    split_record = agent.test_all_states_for_split(transition_records)

    abstr_states_seen = []
    for ground_state in agent.mdp.get_all_possible_states():
        abstr_state = agent.get_abstr_from_ground(ground_state)

        if abstr_state not in abstr_states_seen:
            print('Overlapping states for', abstr_state, end=' ')
            ground_states = agent.get_ground_states_from_abstract_state(
                abstr_state)
            for ground in ground_states:
                print(ground, end=' ')
            print()
            agent.print_action_to_state_list(
                agent.test_state_for_split(transition_records[abstr_state]))
            abstr_states_seen.append(abstr_state)
    print("About to print")
    #count_number_of_splits(split_record)
    print(split_record)
    memory_record = split_record_to_additional_states(split_record)
    print_memory_record(memory_record)

    if error_dict:
        for key, value in error_dict.items():
            print('Error state', key, 'mapped to', value, 'Abstr state',
                  abstr_mdp.get_abstr_from_ground(value))
            print('States in corr abstr state are', end=' ')
            group = abstr_mdp.get_ground_from_abstr(
                abstr_mdp.get_abstr_from_ground(value))
            for state in group:
                print(state, end=' ')
コード例 #2
0
	def make_abstraction(self, abstr_type, epsilon, ignore_zeroes=False, threshold=1e-6):
		"""
		Create an abstraction out of the current q-table of the given type with given epsilon
		:return: new_abstr_mdp, a new abstract MDP made from the current q-table, with q-values informed by current
					q-table
		"""
		# Create a state abstraction based on the current q-table
		curr_q_table = self.get_q_table()
		new_abstr = make_abstr(curr_q_table, abstr_type, epsilon=epsilon, ignore_zeroes=ignore_zeroes, threshold=threshold)

		# Update agent's q-table for the new abstract states
		# For each new abstract state, average the state-action values of the constituent states and
		#  make that average the state-action value for the new abstract state
		new_q_table = defaultdict(lambda : 0.0)

		#  All old values are the same
		for key, value in curr_q_table.items():
			new_q_table[key] = value

		# Get possible states of MDP
		possible_states = self.mdp.get_all_possible_states()

		# Make guess at new values for abstract states by averaging state-action values of constituent states
		#  Iterate through all abstract states
		for abstr_state in new_abstr.abstr_dict.values():
			# For each action...
			for action in self.mdp.actions:
				action_val = 0
				map_count = 0
				# ...Get the states that are grouped together and average their state-action values for that action
				for ground_state in possible_states:
					if new_abstr.get_abstr_from_ground(ground_state).data == abstr_state:
						action_val += curr_q_table[(ground_state, action)]
						map_count += 1
				if map_count != 0:
					# Since abstr_state is just an integer, we have to make a State out of it
					new_q_table[(State(data=abstr_state, is_terminal=False), action)] = action_val / map_count

		# Assign this updated q-table to the agent's q-table
		self._q_table = new_q_table

		# Update the agent's MDP to be the AbstractMDP generated by combining the state abstraction with the current
		#  MDP
		new_abstr_mdp = AbstractMDP(self.mdp, new_abstr)
		self.mdp = new_abstr_mdp

		# Return number of abstract states and number of ground states mapped to abstract states
		unique_abstr_states = []
		ground_states = []
		for key in new_abstr.abstr_dict.keys():
			if key not in ground_states:
				ground_states.append(key)
			if new_abstr.abstr_dict[key] not in unique_abstr_states:
				unique_abstr_states.append(new_abstr.abstr_dict[key])
		return len(unique_abstr_states), len(ground_states)
コード例 #3
0
def apply_noise_from_distribution(ground_mdp,
                                  abstr_type,
                                  approximation_epsilon=0.0,
                                  distribution=None,
                                  distribution_parameters=None,
                                  per_state_distribution=None,
                                  per_state_parameters=None,
                                  seed=None):
    """
    Run value iteration on ground MDP to get true abstraction of given type. Then apply noise by sampling from given
    distribution and add the sampled value to the Q-values. Then create approximate abstraction by grouping together
    based on given epsilon
    :param ground_mdp: the ground mdp with no abstractions
    :param abstr_type: what type of abstraction is desired
    :param distribution: a scipy distribution
    :param distribution_parameters: a dictionary of parameters passed to the distribution when sampling
    :param approximation_epsilon: the epsilon used in making approximate abstractions
    :param per_state_distribution: dictionary mapping states to distributions
    :param per_state_parameters: dictionary mapping states to parameters used for their per-state distributions
    """
    # Get Q-table
    vi = ValueIteration(ground_mdp)
    vi.run_value_iteration()
    q_table = vi.get_q_table()

    # Apply noise sampled from distribution to Q-table
    for (state, action), value in q_table.items():
        #print(state, action, value)
        # If there is a specific per-state distribution, apply that
        if per_state_distribution:
            if state in per_state_distribution.keys():
                dist = per_state_distribution[state]
                args = per_state_parameters[state]
                noise = dist.rvs(**args)
                q_table[(state, action)] += noise
        # Otherwise apply mdp-wide distribution
        else:
            noise = distribution.rvs(**distribution_parameters)
            #print(noise)
            q_table[(state, action)] += noise
        #print('New value:', q_table[(state, action)],'\n')

    # Make new epsilon-approximate abstraction
    new_s_a = make_abstr(q_table,
                         abstr_type,
                         epsilon=approximation_epsilon,
                         combine_zeroes=True,
                         threshold=0.0,
                         seed=seed)

    # Create abstract MDP with this corrupted s_a
    corr_mdp = AbstractMDP(ground_mdp, new_s_a)

    return corr_mdp
コード例 #4
0
    def make_abstr_mdp(self, abstr_type, abstr_epsilon=0.0, seed=None):
        """
		Create an abstract MDP with the given abstraction type
		:param abstr_type: the type of abstraction
		:param abstr_epsilon: the epsilon threshold for approximate abstraction
		:return: abstr_mdp
		"""
        vi = ValueIteration(self)
        vi.run_value_iteration()
        q_table = vi.get_q_table()
        s_a = make_abstr(q_table, abstr_type, abstr_epsilon, seed=seed)
        abstr_mdp = AbstractMDP(self, s_a)
        return abstr_mdp
コード例 #5
0
from MDP.ValueIterationClass import ValueIteration
from resources.AbstractionTypes import Abstr_type
from resources.AbstractionCorrupters import make_corruption
from resources.AbstractionMakers import make_abstr

import numpy as np

# Number of states to corrupt
STATE_NUM = 20

# Create abstract MDP
mdp = GridWorldMDP()
vi = ValueIteration(mdp)
vi.run_value_iteration()
q_table = vi.get_q_table()
state_abstr = make_abstr(q_table, Abstr_type.PI_STAR)
abstr_mdp = AbstractMDP(mdp, state_abstr)

# Randomly select our list of states and print them out
states_to_corrupt = np.random.choice(mdp.get_all_possible_states(),
                                     size=STATE_NUM,
                                     replace=False)
for state in states_to_corrupt:
    print(state)

# Create a corrupt MDP
corr_mdp = make_corruption(abstr_mdp, states_to_corrupt)

for state in states_to_corrupt:
    print(abstr_mdp.get_abstr_from_ground(state),
          corr_mdp.get_abstr_from_ground(state))
コード例 #6
0
    mdp = GridWorldMDP(slip_prob=0.0)
    # Run VI to get q-table
    vi = ValueIteration(mdp)
    vi.run_value_iteration()
    q_table = vi.get_q_table()
    # Make state abstractions
    q_star_abstr = make_abstr(q_table, Abstr_type.Q_STAR)
    a_star_abstr = make_abstr(q_table, Abstr_type.A_STAR)
    pi_star_abstr = make_abstr(q_table, Abstr_type.PI_STAR)
    # Make abstract MDPs - NOTE THIS CLASS HAS BEEN DEPRECATED DO NOT USE
    q_mdp = AbstractGridWorldMDP(state_abstr=q_star_abstr)
    a_mdp = AbstractGridWorldMDP(state_abstr=a_star_abstr)
    pi_mdp = AbstractGridWorldMDP(state_abstr=pi_star_abstr)

    # This is the type of
    q2_mdp = AbstractMDP(mdp, state_abstr=q_star_abstr)

    print("VALUE OF OPTIMAL POLICY")
    print_q_table(q_table)

    print("\n\n\nQ* ABSTR")
    print(q_star_abstr)
    #print(a_star_abstr)
    #print(pi_star_abstr)

    # Create agents on each of these MDPs
    ground_agent = Agent(mdp)
    q_agent = Agent(q_mdp)
    q2_agent = Agent(q2_mdp)
    a_agent = Agent(a_mdp)
    pi_agent = Agent(pi_mdp)
コード例 #7
0
    def __init__(self,
                 mdp,
                 abstr_dicts=None,
                 num_corrupted_mdps=1,
                 num_agents=10,
                 num_episodes=100,
                 results_dir='exp_results/simple',
                 agent_exploration_epsilon=0.1,
                 agent_learning_rate=0.1,
                 detach_interval=None,
                 prevent_cycles=False,
                 variance_threshold=False,
                 reset_q_value=False):

        self.ground_mdp = mdp
        self.abstr_dicts = abstr_dicts
        self.num_agents = num_agents
        self.num_corrupted_mdps = num_corrupted_mdps
        self.num_episodes = num_episodes
        self.results_dir = results_dir
        self.num_episodes = num_episodes
        self.agent_exploration_epsilon = agent_exploration_epsilon
        self.agent_learning_rate = agent_learning_rate
        self.detach_interval = detach_interval
        self.prevent_cycles = prevent_cycles
        self.variance_threshold = variance_threshold
        self.reset_q_value = reset_q_value

        # Run VI and get q-table. Used for graphing results
        vi = ValueIteration(mdp)
        vi.run_value_iteration()
        q_table = vi.get_q_table()
        self.vi_table = q_table
        self.vi = vi

        # This will hold all the agents. Key is ('explicit errors', abstraction dict number, mdp number),
        #  value is the MDP itself
        self.agents = {}

        # Create the corrupt MDPs from the provided abstraction dictionaries
        self.corrupt_mdp_dict = {}
        if self.abstr_dicts is not None:
            if not os.path.exists(os.path.join(self.results_dir, 'corrupted')):
                os.makedirs(os.path.join(self.results_dir, 'corrupted'))
            for i in range(len(self.abstr_dicts)):
                abstr_dict = self.abstr_dicts[i]
                for j in range(self.num_corrupted_mdps):
                    # Make a state abstraction that corresponds to given abstraction dictionary
                    s_a = StateAbstraction(abstr_dict=abstr_dict, epsilon=0)
                    abstr_mdp = AbstractMDP(mdp, s_a)
                    self.corrupt_mdp_dict[('explicit errors', i,
                                           j)] = abstr_mdp

        # Create the agents on the ground MDP
        ground_agents = []
        for i in range(self.num_agents):
            temp_mdp = SimpleMDP()
            agent = AbstractionAgent(temp_mdp,
                                     epsilon=agent_exploration_epsilon,
                                     alpha=agent_learning_rate,
                                     decay_exploration=False)
            ground_agents.append(agent)
        self.agents['ground'] = ground_agents

        # Create agents on the corrupt MDPs
        self.corr_agents = {}
        for key in self.corrupt_mdp_dict.keys():
            corr_ensemble = []
            for i in range(self.num_agents):
                # This makes an AbstractionAgent from the state abstraction corresponding to the abstract MDP
                temp_mdp = copy.deepcopy(SimpleMDP())
                corr_mdp = copy.deepcopy(self.corrupt_mdp_dict[key].copy())
                s_a = copy.deepcopy(corr_mdp.state_abstr)
                agent = AbstractionAgent(temp_mdp,
                                         s_a,
                                         epsilon=agent_exploration_epsilon,
                                         alpha=agent_learning_rate,
                                         decay_exploration=False)
                corr_ensemble.append(agent)
            self.corr_agents[key] = corr_ensemble

        # If detach interval is set, create another set of agents that will run detachment algorithm
        self.corr_detach_agents = {}
        for key in self.corrupt_mdp_dict.keys():
            corr_ensemble = []
            for i in range(self.num_agents):
                print('making detach agent', i)
                temp_mdp = copy.deepcopy(SimpleMDP())
                corr_mdp = copy.deepcopy(self.corrupt_mdp_dict[key].copy())
                s_a = copy.deepcopy(corr_mdp.state_abstr)
                agent = AbstractionAgent(temp_mdp,
                                         s_a,
                                         epsilon=agent_exploration_epsilon,
                                         alpha=agent_learning_rate,
                                         decay_exploration=False)
                corr_ensemble.append(agent)
            self.corr_detach_agents[key] = corr_ensemble