def test_udm(mdp, abstr_type, num_episodes, error_dict=None, episode_buffer=0): """ Test how UDM performs on the given MDP """ # Make abstract MDP abstr_mdp = mdp.make_abstr_mdp(abstr_type) # Apply corruption if argument is provided if error_dict: c_s_a = make_corruption(abstr_mdp, reassignment_dict=error_dict) abstr_mdp = AbstractMDP(mdp, c_s_a) agent = UDMAgent(mdp, s_a=abstr_mdp.state_abstr, transition_threshold=TRANSITION_THRESHOLD, episode_buffer=episode_buffer) # Print abstraction print('State abstraction is') print(agent.get_abstraction_as_string()) # Explore the number of episodes while agent.episode_count < num_episodes: agent.explore() if agent.episode_count > 0 and agent.end_of_episode_record[-1] == 1: print('On episode', agent.episode_count, agent.step_count_record[agent.episode_count - 1]) # Calculate state splits based on UDM algorithm transition_records = agent.get_all_transition_returns() split_record = agent.test_all_states_for_split(transition_records) abstr_states_seen = [] for ground_state in agent.mdp.get_all_possible_states(): abstr_state = agent.get_abstr_from_ground(ground_state) if abstr_state not in abstr_states_seen: print('Overlapping states for', abstr_state, end=' ') ground_states = agent.get_ground_states_from_abstract_state( abstr_state) for ground in ground_states: print(ground, end=' ') print() agent.print_action_to_state_list( agent.test_state_for_split(transition_records[abstr_state])) abstr_states_seen.append(abstr_state) print("About to print") #count_number_of_splits(split_record) print(split_record) memory_record = split_record_to_additional_states(split_record) print_memory_record(memory_record) if error_dict: for key, value in error_dict.items(): print('Error state', key, 'mapped to', value, 'Abstr state', abstr_mdp.get_abstr_from_ground(value)) print('States in corr abstr state are', end=' ') group = abstr_mdp.get_ground_from_abstr( abstr_mdp.get_abstr_from_ground(value)) for state in group: print(state, end=' ')
from MDP.ValueIterationClass import ValueIteration from resources.AbstractionTypes import Abstr_type from resources.AbstractionCorrupters import make_corruption from resources.AbstractionMakers import make_abstr import numpy as np # Number of states to corrupt STATE_NUM = 20 # Create abstract MDP mdp = GridWorldMDP() vi = ValueIteration(mdp) vi.run_value_iteration() q_table = vi.get_q_table() state_abstr = make_abstr(q_table, Abstr_type.PI_STAR) abstr_mdp = AbstractMDP(mdp, state_abstr) # Randomly select our list of states and print them out states_to_corrupt = np.random.choice(mdp.get_all_possible_states(), size=STATE_NUM, replace=False) for state in states_to_corrupt: print(state) # Create a corrupt MDP corr_mdp = make_corruption(abstr_mdp, states_to_corrupt) for state in states_to_corrupt: print(abstr_mdp.get_abstr_from_ground(state), corr_mdp.get_abstr_from_ground(state))