コード例 #1
0
def test_udm(mdp, abstr_type, num_episodes, error_dict=None, episode_buffer=0):
    """
    Test how UDM performs on the given MDP
    """
    # Make abstract MDP
    abstr_mdp = mdp.make_abstr_mdp(abstr_type)
    # Apply corruption if argument is provided
    if error_dict:
        c_s_a = make_corruption(abstr_mdp, reassignment_dict=error_dict)
        abstr_mdp = AbstractMDP(mdp, c_s_a)
    agent = UDMAgent(mdp,
                     s_a=abstr_mdp.state_abstr,
                     transition_threshold=TRANSITION_THRESHOLD,
                     episode_buffer=episode_buffer)
    # Print abstraction
    print('State abstraction is')
    print(agent.get_abstraction_as_string())

    # Explore the number of episodes
    while agent.episode_count < num_episodes:
        agent.explore()
        if agent.episode_count > 0 and agent.end_of_episode_record[-1] == 1:
            print('On episode', agent.episode_count,
                  agent.step_count_record[agent.episode_count - 1])

    # Calculate state splits based on UDM algorithm
    transition_records = agent.get_all_transition_returns()
    split_record = agent.test_all_states_for_split(transition_records)

    abstr_states_seen = []
    for ground_state in agent.mdp.get_all_possible_states():
        abstr_state = agent.get_abstr_from_ground(ground_state)

        if abstr_state not in abstr_states_seen:
            print('Overlapping states for', abstr_state, end=' ')
            ground_states = agent.get_ground_states_from_abstract_state(
                abstr_state)
            for ground in ground_states:
                print(ground, end=' ')
            print()
            agent.print_action_to_state_list(
                agent.test_state_for_split(transition_records[abstr_state]))
            abstr_states_seen.append(abstr_state)
    print("About to print")
    #count_number_of_splits(split_record)
    print(split_record)
    memory_record = split_record_to_additional_states(split_record)
    print_memory_record(memory_record)

    if error_dict:
        for key, value in error_dict.items():
            print('Error state', key, 'mapped to', value, 'Abstr state',
                  abstr_mdp.get_abstr_from_ground(value))
            print('States in corr abstr state are', end=' ')
            group = abstr_mdp.get_ground_from_abstr(
                abstr_mdp.get_abstr_from_ground(value))
            for state in group:
                print(state, end=' ')
コード例 #2
0
from MDP.ValueIterationClass import ValueIteration
from resources.AbstractionTypes import Abstr_type
from resources.AbstractionCorrupters import make_corruption
from resources.AbstractionMakers import make_abstr

import numpy as np

# Number of states to corrupt
STATE_NUM = 20

# Create abstract MDP
mdp = GridWorldMDP()
vi = ValueIteration(mdp)
vi.run_value_iteration()
q_table = vi.get_q_table()
state_abstr = make_abstr(q_table, Abstr_type.PI_STAR)
abstr_mdp = AbstractMDP(mdp, state_abstr)

# Randomly select our list of states and print them out
states_to_corrupt = np.random.choice(mdp.get_all_possible_states(),
                                     size=STATE_NUM,
                                     replace=False)
for state in states_to_corrupt:
    print(state)

# Create a corrupt MDP
corr_mdp = make_corruption(abstr_mdp, states_to_corrupt)

for state in states_to_corrupt:
    print(abstr_mdp.get_abstr_from_ground(state),
          corr_mdp.get_abstr_from_ground(state))