def test_udm(mdp, abstr_type, num_episodes, error_dict=None, episode_buffer=0): """ Test how UDM performs on the given MDP """ # Make abstract MDP abstr_mdp = mdp.make_abstr_mdp(abstr_type) # Apply corruption if argument is provided if error_dict: c_s_a = make_corruption(abstr_mdp, reassignment_dict=error_dict) abstr_mdp = AbstractMDP(mdp, c_s_a) agent = UDMAgent(mdp, s_a=abstr_mdp.state_abstr, transition_threshold=TRANSITION_THRESHOLD, episode_buffer=episode_buffer) # Print abstraction print('State abstraction is') print(agent.get_abstraction_as_string()) # Explore the number of episodes while agent.episode_count < num_episodes: agent.explore() if agent.episode_count > 0 and agent.end_of_episode_record[-1] == 1: print('On episode', agent.episode_count, agent.step_count_record[agent.episode_count - 1]) # Calculate state splits based on UDM algorithm transition_records = agent.get_all_transition_returns() split_record = agent.test_all_states_for_split(transition_records) abstr_states_seen = [] for ground_state in agent.mdp.get_all_possible_states(): abstr_state = agent.get_abstr_from_ground(ground_state) if abstr_state not in abstr_states_seen: print('Overlapping states for', abstr_state, end=' ') ground_states = agent.get_ground_states_from_abstract_state( abstr_state) for ground in ground_states: print(ground, end=' ') print() agent.print_action_to_state_list( agent.test_state_for_split(transition_records[abstr_state])) abstr_states_seen.append(abstr_state) print("About to print") #count_number_of_splits(split_record) print(split_record) memory_record = split_record_to_additional_states(split_record) print_memory_record(memory_record) if error_dict: for key, value in error_dict.items(): print('Error state', key, 'mapped to', value, 'Abstr state', abstr_mdp.get_abstr_from_ground(value)) print('States in corr abstr state are', end=' ') group = abstr_mdp.get_ground_from_abstr( abstr_mdp.get_abstr_from_ground(value)) for state in group: print(state, end=' ')
def make_abstraction(self, abstr_type, epsilon, ignore_zeroes=False, threshold=1e-6): """ Create an abstraction out of the current q-table of the given type with given epsilon :return: new_abstr_mdp, a new abstract MDP made from the current q-table, with q-values informed by current q-table """ # Create a state abstraction based on the current q-table curr_q_table = self.get_q_table() new_abstr = make_abstr(curr_q_table, abstr_type, epsilon=epsilon, ignore_zeroes=ignore_zeroes, threshold=threshold) # Update agent's q-table for the new abstract states # For each new abstract state, average the state-action values of the constituent states and # make that average the state-action value for the new abstract state new_q_table = defaultdict(lambda : 0.0) # All old values are the same for key, value in curr_q_table.items(): new_q_table[key] = value # Get possible states of MDP possible_states = self.mdp.get_all_possible_states() # Make guess at new values for abstract states by averaging state-action values of constituent states # Iterate through all abstract states for abstr_state in new_abstr.abstr_dict.values(): # For each action... for action in self.mdp.actions: action_val = 0 map_count = 0 # ...Get the states that are grouped together and average their state-action values for that action for ground_state in possible_states: if new_abstr.get_abstr_from_ground(ground_state).data == abstr_state: action_val += curr_q_table[(ground_state, action)] map_count += 1 if map_count != 0: # Since abstr_state is just an integer, we have to make a State out of it new_q_table[(State(data=abstr_state, is_terminal=False), action)] = action_val / map_count # Assign this updated q-table to the agent's q-table self._q_table = new_q_table # Update the agent's MDP to be the AbstractMDP generated by combining the state abstraction with the current # MDP new_abstr_mdp = AbstractMDP(self.mdp, new_abstr) self.mdp = new_abstr_mdp # Return number of abstract states and number of ground states mapped to abstract states unique_abstr_states = [] ground_states = [] for key in new_abstr.abstr_dict.keys(): if key not in ground_states: ground_states.append(key) if new_abstr.abstr_dict[key] not in unique_abstr_states: unique_abstr_states.append(new_abstr.abstr_dict[key]) return len(unique_abstr_states), len(ground_states)
def apply_noise_from_distribution(ground_mdp, abstr_type, approximation_epsilon=0.0, distribution=None, distribution_parameters=None, per_state_distribution=None, per_state_parameters=None, seed=None): """ Run value iteration on ground MDP to get true abstraction of given type. Then apply noise by sampling from given distribution and add the sampled value to the Q-values. Then create approximate abstraction by grouping together based on given epsilon :param ground_mdp: the ground mdp with no abstractions :param abstr_type: what type of abstraction is desired :param distribution: a scipy distribution :param distribution_parameters: a dictionary of parameters passed to the distribution when sampling :param approximation_epsilon: the epsilon used in making approximate abstractions :param per_state_distribution: dictionary mapping states to distributions :param per_state_parameters: dictionary mapping states to parameters used for their per-state distributions """ # Get Q-table vi = ValueIteration(ground_mdp) vi.run_value_iteration() q_table = vi.get_q_table() # Apply noise sampled from distribution to Q-table for (state, action), value in q_table.items(): #print(state, action, value) # If there is a specific per-state distribution, apply that if per_state_distribution: if state in per_state_distribution.keys(): dist = per_state_distribution[state] args = per_state_parameters[state] noise = dist.rvs(**args) q_table[(state, action)] += noise # Otherwise apply mdp-wide distribution else: noise = distribution.rvs(**distribution_parameters) #print(noise) q_table[(state, action)] += noise #print('New value:', q_table[(state, action)],'\n') # Make new epsilon-approximate abstraction new_s_a = make_abstr(q_table, abstr_type, epsilon=approximation_epsilon, combine_zeroes=True, threshold=0.0, seed=seed) # Create abstract MDP with this corrupted s_a corr_mdp = AbstractMDP(ground_mdp, new_s_a) return corr_mdp
def make_abstr_mdp(self, abstr_type, abstr_epsilon=0.0, seed=None): """ Create an abstract MDP with the given abstraction type :param abstr_type: the type of abstraction :param abstr_epsilon: the epsilon threshold for approximate abstraction :return: abstr_mdp """ vi = ValueIteration(self) vi.run_value_iteration() q_table = vi.get_q_table() s_a = make_abstr(q_table, abstr_type, abstr_epsilon, seed=seed) abstr_mdp = AbstractMDP(self, s_a) return abstr_mdp
from MDP.ValueIterationClass import ValueIteration from resources.AbstractionTypes import Abstr_type from resources.AbstractionCorrupters import make_corruption from resources.AbstractionMakers import make_abstr import numpy as np # Number of states to corrupt STATE_NUM = 20 # Create abstract MDP mdp = GridWorldMDP() vi = ValueIteration(mdp) vi.run_value_iteration() q_table = vi.get_q_table() state_abstr = make_abstr(q_table, Abstr_type.PI_STAR) abstr_mdp = AbstractMDP(mdp, state_abstr) # Randomly select our list of states and print them out states_to_corrupt = np.random.choice(mdp.get_all_possible_states(), size=STATE_NUM, replace=False) for state in states_to_corrupt: print(state) # Create a corrupt MDP corr_mdp = make_corruption(abstr_mdp, states_to_corrupt) for state in states_to_corrupt: print(abstr_mdp.get_abstr_from_ground(state), corr_mdp.get_abstr_from_ground(state))
mdp = GridWorldMDP(slip_prob=0.0) # Run VI to get q-table vi = ValueIteration(mdp) vi.run_value_iteration() q_table = vi.get_q_table() # Make state abstractions q_star_abstr = make_abstr(q_table, Abstr_type.Q_STAR) a_star_abstr = make_abstr(q_table, Abstr_type.A_STAR) pi_star_abstr = make_abstr(q_table, Abstr_type.PI_STAR) # Make abstract MDPs - NOTE THIS CLASS HAS BEEN DEPRECATED DO NOT USE q_mdp = AbstractGridWorldMDP(state_abstr=q_star_abstr) a_mdp = AbstractGridWorldMDP(state_abstr=a_star_abstr) pi_mdp = AbstractGridWorldMDP(state_abstr=pi_star_abstr) # This is the type of q2_mdp = AbstractMDP(mdp, state_abstr=q_star_abstr) print("VALUE OF OPTIMAL POLICY") print_q_table(q_table) print("\n\n\nQ* ABSTR") print(q_star_abstr) #print(a_star_abstr) #print(pi_star_abstr) # Create agents on each of these MDPs ground_agent = Agent(mdp) q_agent = Agent(q_mdp) q2_agent = Agent(q2_mdp) a_agent = Agent(a_mdp) pi_agent = Agent(pi_mdp)
def __init__(self, mdp, abstr_dicts=None, num_corrupted_mdps=1, num_agents=10, num_episodes=100, results_dir='exp_results/simple', agent_exploration_epsilon=0.1, agent_learning_rate=0.1, detach_interval=None, prevent_cycles=False, variance_threshold=False, reset_q_value=False): self.ground_mdp = mdp self.abstr_dicts = abstr_dicts self.num_agents = num_agents self.num_corrupted_mdps = num_corrupted_mdps self.num_episodes = num_episodes self.results_dir = results_dir self.num_episodes = num_episodes self.agent_exploration_epsilon = agent_exploration_epsilon self.agent_learning_rate = agent_learning_rate self.detach_interval = detach_interval self.prevent_cycles = prevent_cycles self.variance_threshold = variance_threshold self.reset_q_value = reset_q_value # Run VI and get q-table. Used for graphing results vi = ValueIteration(mdp) vi.run_value_iteration() q_table = vi.get_q_table() self.vi_table = q_table self.vi = vi # This will hold all the agents. Key is ('explicit errors', abstraction dict number, mdp number), # value is the MDP itself self.agents = {} # Create the corrupt MDPs from the provided abstraction dictionaries self.corrupt_mdp_dict = {} if self.abstr_dicts is not None: if not os.path.exists(os.path.join(self.results_dir, 'corrupted')): os.makedirs(os.path.join(self.results_dir, 'corrupted')) for i in range(len(self.abstr_dicts)): abstr_dict = self.abstr_dicts[i] for j in range(self.num_corrupted_mdps): # Make a state abstraction that corresponds to given abstraction dictionary s_a = StateAbstraction(abstr_dict=abstr_dict, epsilon=0) abstr_mdp = AbstractMDP(mdp, s_a) self.corrupt_mdp_dict[('explicit errors', i, j)] = abstr_mdp # Create the agents on the ground MDP ground_agents = [] for i in range(self.num_agents): temp_mdp = SimpleMDP() agent = AbstractionAgent(temp_mdp, epsilon=agent_exploration_epsilon, alpha=agent_learning_rate, decay_exploration=False) ground_agents.append(agent) self.agents['ground'] = ground_agents # Create agents on the corrupt MDPs self.corr_agents = {} for key in self.corrupt_mdp_dict.keys(): corr_ensemble = [] for i in range(self.num_agents): # This makes an AbstractionAgent from the state abstraction corresponding to the abstract MDP temp_mdp = copy.deepcopy(SimpleMDP()) corr_mdp = copy.deepcopy(self.corrupt_mdp_dict[key].copy()) s_a = copy.deepcopy(corr_mdp.state_abstr) agent = AbstractionAgent(temp_mdp, s_a, epsilon=agent_exploration_epsilon, alpha=agent_learning_rate, decay_exploration=False) corr_ensemble.append(agent) self.corr_agents[key] = corr_ensemble # If detach interval is set, create another set of agents that will run detachment algorithm self.corr_detach_agents = {} for key in self.corrupt_mdp_dict.keys(): corr_ensemble = [] for i in range(self.num_agents): print('making detach agent', i) temp_mdp = copy.deepcopy(SimpleMDP()) corr_mdp = copy.deepcopy(self.corrupt_mdp_dict[key].copy()) s_a = copy.deepcopy(corr_mdp.state_abstr) agent = AbstractionAgent(temp_mdp, s_a, epsilon=agent_exploration_epsilon, alpha=agent_learning_rate, decay_exploration=False) corr_ensemble.append(agent) self.corr_detach_agents[key] = corr_ensemble