Python GridWorldMDPの例、GridWorld.GridWorldMDPClass.GridWorldMDP Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_abstr_agent.py プロジェクト: AstronautCharlie/Real_Simple_RL

def test_detach_inconsistent_states(abstr_type):
    mdp = GridWorldMDP()
    abstr_mdp = mdp.make_abstr_mdp(abstr_type)
    agent = AbstractionAgent(mdp, s_a=abstr_mdp.state_abstr)
    for i in range(1000000):
        if i % 1000 == 0:
            print('On step', i)
        agent.explore()
    error_states = agent.detach_inconsistent_states(verbose=True)

コード例 #2

0

ファイルを表示

ファイル: test_abstr_agent.py プロジェクト: AstronautCharlie/Real_Simple_RL

def test_get_ground_states_from_abstact_state():
    mdp = GridWorldMDP()
    abstr_mdp = mdp.make_abstr_mdp(Abstr_type.PI_STAR)
    agent = AbstractionAgent(mdp, s_a=abstr_mdp.state_abstr)

    for value in agent.s_a.abstr_dict.values():
        print(value, end= ' ')
        ground_states = agent.get_ground_states_from_abstract_state(value)
        for state in ground_states:
            print(state, end = ' ')
        print()

コード例 #3

0

ファイルを表示

ファイル: test_abstr_agent.py プロジェクト: AstronautCharlie/Real_Simple_RL

def test_check_abstract_state_consistency():
    mdp = GridWorldMDP()
    abstr_mdp = mdp.make_abstr_mdp(Abstr_type.A_STAR)
    agent = AbstractionAgent(mdp, s_a = abstr_mdp.state_abstr)
    for i in range(100000):
        if i % 1000 == 0:
            print('On step', i)
        agent.explore()
    # Get all abstract states
    abstr_states = []
    for value in agent.s_a.abstr_dict.values():
        abstr_states.append(value)
    abstr_states = agent.get_abstract_states()
    for abstr_state in abstr_states:
        agent.check_abstract_state_consistency(abstr_state, verbose=True)

コード例 #4

0

ファイルを表示

ファイル: test_abstr_agent.py プロジェクト: AstronautCharlie/Real_Simple_RL

def test_rollout_adjustment(key):
    """
    Train the agent on a state abstraction with fatal errors. Then generate a roll-out, detach the first state that's
    part of a cycle, and restart learning.
    """
    # Load a poorly-performing abstraction
    names = ['AbstrType', 'AbstrEps', 'CorrType', 'CorrProp', 'Batch', 'Dict']
    df = pd.read_csv('../abstr_exp/corrupted/corrupted_abstractions.csv', names=names)
    abstr_string = df.loc[(df['AbstrType'] == str(key[0]))
                        & (df['AbstrEps'] == key[1])
                        & (df['CorrType'] == str(key[2]))
                        & (df['CorrProp'] == key[3])
                        & (df['Batch'] == key[4])]['Dict'].values[0]
    abstr_list = ast.literal_eval(abstr_string)
    abstr_dict = {}
    for el in abstr_list:
        is_term = el[0][0] == 11 and el[0][1] == 11
        state = GridWorldState(el[0][0], el[0][1], is_terminal=is_term)
        abstr_dict[state] = el[1]

    # Create an agent with this abstraction
    s_a = StateAbstraction(abstr_dict, abstr_type=Abstr_type.PI_STAR)
    mdp = GridWorldMDP()
    agent = AbstractionAgent(mdp, s_a=s_a)

    # This is useful for later
    agent2 = copy.deepcopy(agent)

    # Generate a roll-out from a trained agent after 10000 steps
    for i in range(5000):
        agent.explore()
    rollout = agent.generate_rollout()
    print('Roll-out for model with no adjustment, 5,000 steps')
    for state in rollout:
        print(state, end=', ')
    for i in range(5000):
        agent.explore()
    rollout = agent.generate_rollout()
    print('Roll-out for model with no adjustment, 10,000 steps')
    for state in rollout:
        print(state, end=', ')
    print('\n')

    # Train an agent for 5000 steps, detach the first state in the cycle, and train for another 5000 steps
    #  The hope is that this will get further than the 10000 step one
    for i in range(5000):
        agent2.explore()
    rollout = agent2.generate_rollout()
    print('Roll-out for model pre-adjustment, 5,000 steps')
    for state in rollout:
        print(state, end=', ')
    print()
    print('Detaching state', rollout[-1])
    agent2.detach_state(rollout[-1])
    for i in range(5000):
        agent2.explore()
    rollout = agent2.generate_rollout()
    print('Roll-out for model post-adjustment, 10,000 steps')
    for state in rollout:
        print(state, end=', ')

コード例 #5

0

ファイルを表示

ファイル: test_experiment_class.py プロジェクト: AstronautCharlie/Real_Simple_RL

def test_gridworld():
    mdp = GridWorldMDP()
    mdp_str = 'rooms'
    #mdp = TaxiMDP()
    #mdp_str = 'taxi'

    eps = 0.0

    abstr_epsilon_list = [(Abstr_type.Q_STAR, eps), (Abstr_type.A_STAR, eps),
                          (Abstr_type.PI_STAR, eps)]
    #abstr_epsilon_list = [(Abstr_type.A_STAR, eps), (Abstr_type.PI_STAR, eps)]
    exp = Experiment(mdp, num_agents=20, abstr_epsilon_list=abstr_epsilon_list)

    # Testing that one agent in an ensemble acting on its MDP won't affect another agent
    '''
    print(exp)
    for agent in exp.agents['ground']:
        print(agent.mdp)
    print()
    for i in range(100):
        exp.agents['ground'][0].explore()
    for agent in exp.agents['ground']:
        print(agent.mdp)
    '''

    # Testing run_trajectory
    '''
    for i in range(20):
        actual, optimal = exp.run_trajectory(exp.agents['ground'][0])
        print(actual, optimal)

    print('\n\n\n')

    for i in range(20):
        actual, optimal = exp.run_trajectory(exp.agents[(Abstr_type.PI_STAR, 0.0)][0])
        print(actual, optimal)
    '''

    # Testing run_ensemble
    #print(exp.run_ensemble(exp.agents[(Abstr_type.Q_STAR, 0.0)]))

    # Testing writing to file
    data, steps = exp.run_all_ensembles(num_episodes=500)

    # Testing plotting results
    exp.visualize_results(
        data, 'results/exp_graph_' + mdp_str + '_' + str(eps) + '.png')

    exp.visualize_results(
        steps, 'results/step_counts_' + mdp_str + '_' + str(eps) + '.png')

コード例 #6

0

ファイルを表示

ファイル: test_abstr_agent.py プロジェクト: AstronautCharlie/Real_Simple_RL

def test_check_for_optimal_action_and_value(states, num_steps):
    """
    Create a list of actions generated by following the greedy policy, starting at the given state
    """
    mdp = GridWorldMDP()
    abstr_mdp = mdp.make_abstr_mdp(Abstr_type.Q_STAR)
    agent = AbstractionAgent(mdp, s_a=abstr_mdp.state_abstr)
    for i in range(100000):
        if i % 1000 == 0:
            print('On step', i)
        agent.explore()

    # print(agent.get_learned_policy_as_string())
    policy = agent.get_learned_policy()
    #for key, value in agent.get_learned_policy_as_string().items():
    #    print(key, value, agent.get_q_value(key[0], key[1]))
    for s in agent.mdp.get_all_possible_states():
        #for a in agent.mdp.actions:
        print(s, agent.get_best_action_value(s))

    for state in states:
        mdp_state = GridWorldState(state[0], state[1])
        action, value = agent.check_for_optimal_action_value_next_state(mdp_state, verbose=True)
        print()

コード例 #7

0

ファイルを表示

ファイル: test_abstraction_corrupters.py プロジェクト: AstronautCharlie/Real_Simple_RL

def test_fourrooms(abstr_type, noise=0.0):
    """
    Test the corruption of a Q* abstraction in fourrooms
    :param abstr_type: the type of abstraction to be tested
    :param noise: the proportion of states to be scrambled
    :return:
    """
    # Make a grid world MDP and create an abstraction of the given type from it
    mdp = GridWorldMDP()
    vi = ValueIteration(mdp)
    vi.run_value_iteration()
    q_table = vi.get_q_table()
    true_abstr = make_abstr(q_table, abstr_type=abstr_type)

    # Corrupt the true results
    corrupt_results = uniform_random(true_abstr, proportion=0.1)

    true_dict = true_abstr.get_abstr_dict()
    corrupt_dict = corrupt_results.get_abstr_dict()

    for key in true_dict.keys():
        if true_dict[key] != corrupt_dict[key]:
            print(key, true_dict[key], corrupt_dict[key])

コード例 #8

0

ファイルを表示

def main():

    # Testing what a Q* abstraction looks like in
    # four rooms

    # Make MDP and train an agent in it
    grid_mdp = GridWorldMDP(height=9, width=9, slip_prob=0.0, gamma=0.99)
    agent = Agent(grid_mdp)

    # Train the agent for 10000 steps
    trajectory = []
    for i in range(100000):
        if i % 1000 == 0:
            print("epsilon, alpha:", agent._epsilon, agent._alpha)
        current_state, action, next_state, _ = agent.explore()
        trajectory.append(current_state)

    already_printed = []
    for state in trajectory:
        if state not in already_printed:
            already_printed.append(state)

    # Print the action values learned at each state
    for state in already_printed:
        print("values learned at state", state)
        print_action_values(agent.get_action_values(state))
        print()

    # Make an abstraction from the agent's q-table
    state_abstr = make_abstr(agent.get_q_table(),
                             Abstr_type.Q_STAR,
                             epsilon=0.05)
    print(state_abstr)

    # Testing that Pi* abstraction works
    '''
	# Create toy q_table to build abstraction from 
	q_table = {(GridWorldState(1,1), Dir.UP): 0.9,
				(GridWorldState(1,1), Dir.DOWN): 0.8,
				(GridWorldState(1,1), Dir.LEFT): 0.7,
				(GridWorldState(1,1), Dir.RIGHT): 0.6,

				# Same optimal action and action value as (1,1)
				(GridWorldState(1,2), Dir.UP): 0.9,
				(GridWorldState(1,2), Dir.DOWN): 0.0,
				(GridWorldState(1,2), Dir.LEFT): 0.2,
				(GridWorldState(1,2), Dir.RIGHT): 0.5,

				# val(UP) = 0.9 but val(DOWN) = 0.91
				(GridWorldState(2,2), Dir.UP): 0.9,
				(GridWorldState(2,2), Dir.DOWN): 0.91,
				(GridWorldState(2,2), Dir.LEFT): 0.8,
				(GridWorldState(2,2), Dir.RIGHT): 0.9,

				# val(UP) = 0.89, max val
				(GridWorldState(2,1), Dir.UP): 0.9,
				(GridWorldState(2,1), Dir.DOWN): 0.9,
				(GridWorldState(2,1), Dir.LEFT): 0.90000000001,
				(GridWorldState(2,1), Dir.RIGHT): 0.7,

				# val(UP) = 0.93, max val 
				(GridWorldState(3,1), Dir.UP): 1000,
				(GridWorldState(3,1), Dir.DOWN): 0.89,
				(GridWorldState(3,1), Dir.LEFT): 0.89,
				(GridWorldState(3,1), Dir.RIGHT): 0.89}
	
	state_abstr = make_abstr(q_table, Abstr_type.PI_STAR)
	print("(1,1), (1,2), and (3,1) should all get mapped together")
	print(state_abstr)
	'''

    # Testing that A* abstraction works
    '''
	# Create toy q_table to build abstraction from 
				# Optimal action/val is UP/0.9
	q_table = {(GridWorldState(1,1), Dir.UP): 0.9,
				(GridWorldState(1,1), Dir.DOWN): 0.8,
				(GridWorldState(1,1), Dir.LEFT): 0.7,
				(GridWorldState(1,1), Dir.RIGHT): 0.6,

				# Same optimal action and action value as (1,1)
				(GridWorldState(1,2), Dir.UP): 0.9,
				(GridWorldState(1,2), Dir.DOWN): 0.0,
				(GridWorldState(1,2), Dir.LEFT): 0.2,
				(GridWorldState(1,2), Dir.RIGHT): 0.5,

				# val(UP) = 0.9 but val(DOWN) = 0.91
				(GridWorldState(2,2), Dir.UP): 0.9,
				(GridWorldState(2,2), Dir.DOWN): 0.91,
				(GridWorldState(2,2), Dir.LEFT): 0.8,
				(GridWorldState(2,2), Dir.RIGHT): 0.9,

				# val(UP) = 0.89, max val
				(GridWorldState(2,1), Dir.UP): 0.89,
				(GridWorldState(2,1), Dir.DOWN): 0.88,
				(GridWorldState(2,1), Dir.LEFT): 0.8,
				(GridWorldState(2,1), Dir.RIGHT): 0.7,

				# val(UP) = 0.93, max val 
				(GridWorldState(3,1), Dir.UP): 0.93,
				(GridWorldState(3,1), Dir.DOWN): 0.89,
				(GridWorldState(3,1), Dir.LEFT): 0.89,
				(GridWorldState(3,1), Dir.RIGHT): 0.89}
	state_abstr = make_abstr(q_table, Abstr_type.A_STAR)
	print("Epsilon = 0. (1,1) and (1,2) should be mapped together")
	print(state_abstr)

	state_abstr = make_abstr(q_table, Abstr_type.A_STAR, epsilon=0.015)
	print("Epsilon = 0.015. (1,1), (1,2), and (2,1) should all be mapped together")
	print(state_abstr)

	state_abstr = make_abstr(q_table, Abstr_type.A_STAR, epsilon=0.031)
	print("Epsilon = 0.031. (1,1), (1,2), (2,1), (3,1) should all be mapped together")
	print(state_abstr)
	'''

    # Testing that Q* abstraction function works
    '''
	# Create toy q_table to build the abstraction from
	q_table = {(GridWorldState(1,1), Dir.UP): 1.0,
				(GridWorldState(1,1), Dir.DOWN): 2.5,
				(GridWorldState(1,1), Dir.LEFT): 2.3,
				(GridWorldState(1,1), Dir.RIGHT): 5.0,

				(GridWorldState(2,1), Dir.UP): 1.0,
				(GridWorldState(2,1), Dir.DOWN): 2.5,
				(GridWorldState(2,1), Dir.LEFT): 2.3,
				(GridWorldState(2,1), Dir.RIGHT): 5.05,

				(GridWorldState(2,2), Dir.UP): 1.1,
				(GridWorldState(2,2), Dir.DOWN): 2.4,
				(GridWorldState(2,2), Dir.LEFT): 2.3,
				(GridWorldState(2,2), Dir.RIGHT): 4.8,

				(GridWorldState(1,2), Dir.UP): 1.3,
				(GridWorldState(1,2), Dir.DOWN): 2.0,
				(GridWorldState(1,2), Dir.LEFT): 2.0,
				(GridWorldState(1,2), Dir.RIGHT): 4.8
				}
	state_abstr = make_abstr(q_table, Abstr_type.Q_STAR)
	print("Epsilon = 0. No shapes should be mapped together.")
	print(str(state_abstr))

	state_abstr = make_abstr(q_table, Abstr_type.Q_STAR, epsilon=0.3)
	print("Epsilon = 0.3. (1,1), (2,1), (2,2) should all be mapped together")
	print(str(state_abstr))

	state_abstr = make_abstr(q_table, Abstr_type.Q_STAR, epsilon=0.1)
	print("Epsilon = 0.1. (1,1), (2,1) should be mapped together. (2,2) should not.")
	print(str(state_abstr))

	state_abstr = make_abstr(q_table, Abstr_type.Q_STAR, epsilon=0.5)
	print("Epsilon = 0.5. (1,1), (2,1), (1,2), (2,2) should all be mapped together")
	print(str(state_abstr))
	'''

    # Testing Q-learning in abstract Four Rooms
    '''
	# Map all the states in the bottom-right room to the same abstract state 
	abstr_dict = {} 
	for i in range(6,12):
		for j in range(1,6):
			abstr_dict[GridWorldState(i,j)] = 'oneroom'

	state_abstr = StateAbstraction(abstr_dict)

	abstr_mdp = AbstractGridWorldMDP(height=11, 
										width=11,
										slip_prob=0.0,
										gamma=0.95,
										build_walls=True,
										state_abstr=state_abstr)
	agent = Agent(abstr_mdp)

	trajectory = [] 
	for i in range(100000):
		#print("At step", i)
		#print("parameters are", agent._alpha, agent.mdp.gamma)
		current_state, action, next_state, _ = agent.explore()
		#print("At", str(current_state), "took action", action, "got to", str(next_state))
		#print("Values learned for", str(current_state), "is")
		#print_action_values(agent.get_action_values(current_state))
		trajectory.append(current_state)
		#print()

	already_printed = [] 
	for state in trajectory:
		if state not in already_printed:
			print("values learned at state", state)
			print_action_values(agent.get_action_values(state))
			already_printed.append(state)

	agent.reset_to_init()
	for i in range(25):
		current_state, action, next_state = agent.apply_best_action()
		print('At', str(current_state), 'taking action', str(action), 'now at', str(next_state))
	'''

    # Testing Q-learning in toy abstract MDP
    '''
	# Simple abstraction in a grid where all states above the start-to-goal
	# diagonal are grouped together and all states below that diagonal
	# are grouped together 
	toy_abstr = StateAbstraction({GridWorldState(2,1): 'up', 
									GridWorldState(3,1): 'up',
									GridWorldState(3,2): 'up',
									GridWorldState(4,1): 'up',
									GridWorldState(4,2): 'up',
									GridWorldState(4,3): 'up',
									GridWorldState(5,1): 'up',
									GridWorldState(5,2): 'up',
									GridWorldState(5,3): 'up',
									GridWorldState(5,4): 'up',
									GridWorldState(1,2): 'right',
									GridWorldState(1,3): 'right',
									GridWorldState(1,4): 'right',
									GridWorldState(1,5): 'right',
									GridWorldState(2,3): 'right',
									GridWorldState(2,4): 'right',
									GridWorldState(2,5): 'right',
									GridWorldState(3,4): 'right',
									GridWorldState(3,5): 'right',
									GridWorldState(4,5): 'right'})
	#print("states covered by abstraction are", toy_abstr.abstr_dict.keys())
	

	abstr_mdp = AbstractGridWorldMDP(height=5, 
							width=5, 
							slip_prob=0.0, 
							gamma=0.95, 
							build_walls=False,
							state_abstr=toy_abstr)

	#print(abstr_mdp.state_abstr.get_abstr_from_ground(GridWorldState(1,1)))
	agent = Agent(abstr_mdp)
	
	trajectory = [] 
	for i in range(10000):
		#print("At step", i)
		#print("parameters are", agent._alpha, agent.mdp.gamma)
		current_state, action, next_state, _ = agent.explore()
		#print("At", str(current_state), "took action", action, "got to", str(next_state))
		#print("Values learned for", str(current_state), "is")
		#print_action_values(agent.get_action_values(current_state))
		trajectory.append(current_state)
		#print()

	already_printed = [] 
	for state in trajectory:
		if state not in already_printed:
			print("values learned at state", state)
			print_action_values(agent.get_action_values(state))
			already_printed.append(state)
	'''

    # Testing both epsilon-greedy and application of best learned
    # policy in ground MDP
    '''
	grid_mdp = GridWorldMDP(height=9, width=9, slip_prob=0.0, gamma=0.95, build_walls=True)

	agent = Agent(grid_mdp)
	#agent.set_current_state(GridWorldState(1,1))

	print(grid_mdp.goal_location)

	# Testing if epsilon-greedy policy works properly 
	trajectory = [] 
	for i in range(10000):
		#print("At step", i)
		#print("parameters are", agent._alpha, agent.mdp.gamma)
		current_state, action, next_state, _ = agent.explore()
		#print("At", str(current_state), "took action", action, "got to", str(next_state))
		#print("Values learned for", str(current_state), "is")
		#print_action_values(agent.get_action_values(current_state))
		trajectory.append(current_state)
		#print()

	#print("Went through the following states:")
	#for state in trajectory:
	#	print(str(state))
	already_printed = [] 
	for state in trajectory:
		if state not in already_printed:
			print("values learned at state", state)
			print_action_values(agent.get_action_values(state))
			already_printed.append(state)
	#print(grid_mdp.walls)

	agent.reset_to_init()

	for i in range(25):
		current_state, action, next_state = agent.apply_best_action()
		print('At', str(current_state), 'taking action', str(action), 'now at', str(next_state))
	'''

    # Testing a few trajectories to make sure the q-table updates
    # properly
    '''
	test_trajectory = [Dir.UP, Dir.RIGHT, Dir.UP, Dir.RIGHT]
	for i in range(5):
		apply_trajectory(agent, test_trajectory)
		agent.set_current_state(GridWorldState(9,9))

	test_trajectory = [Dir.RIGHT, Dir.RIGHT, Dir.UP, Dir.UP]
	apply_trajectory(agent, test_trajectory)
	agent.set_current_state(GridWorldState(9,9))

	test_trajectory = [Dir.UP, Dir.UP, Dir.RIGHT, Dir.RIGHT]
	apply_trajectory(agent, test_trajectory)
	'''

    # Testing motion, reward at goal state, and reset to
    # initial state at terminal state
    '''
	agent = Agent(grid_mdp, go_up_right)
	for i in range(30):
		agent.act()
	print(grid_mdp.walls)
	'''

    # Testing getter for best action/value given state
    '''
	agent = Agent(grid_mdp, go_right, alpha=0.5)
	current_state = agent.get_current_state() 
	test_action = Dir.UP

	# Set q_value for init_state, Dir.UP = 1.0
	agent._set_q_value(current_state, test_action, 1.0)

	# Should give Dir.UP, 1.0 
	print("should give (Dir.UP, 1.0)", agent.get_best_action_value_pair(current_state))

	# Go right by one 
	agent.act()
	print("Currently at", agent.get_current_state())
	# Should give random action with value = 0 
	print("Should give (random_action, 0.0)", agent.get_best_action_value_pair(agent.get_current_state()))
	# Update q-values of this state
	agent._set_q_value(agent.get_current_state(), Dir.UP, -1.0)
	agent._set_q_value(agent.get_current_state(), Dir.DOWN, -1.0)
	agent._set_q_value(agent.get_current_state(), Dir.LEFT, -1.0)
	agent._set_q_value(agent.get_current_state(), Dir.RIGHT, 0.1)
	# Should give Dir.RIGHT, 0.1
	print("Should give (Dir.RIGHT, 0.1)", agent.get_best_action_value_pair(agent.get_current_state()))

	print()
	# Checking that all values were updated properly
	for action in agent.mdp.actions:
		print("action:q-value = ", action, ":", agent.get_q_value(agent.get_current_state(), action))
	'''

    # Testing single instance of the act, update flow
    # Start agent at (10,11), go one right, get reward,
    # check that update happened
    '''

コード例 #9

0

ファイルを表示

ファイル: test_make_corruption.py プロジェクト: AstronautCharlie/Real_Simple_RL

from GridWorld.GridWorldMDPClass import GridWorldMDP
from MDP.StateAbstractionClass import StateAbstraction
from MDP.AbstractMDPClass import AbstractMDP
from MDP.ValueIterationClass import ValueIteration
from resources.AbstractionTypes import Abstr_type
from resources.AbstractionCorrupters import make_corruption
from resources.AbstractionMakers import make_abstr

import numpy as np

# Number of states to corrupt
STATE_NUM = 20

# Create abstract MDP
mdp = GridWorldMDP()
vi = ValueIteration(mdp)
vi.run_value_iteration()
q_table = vi.get_q_table()
state_abstr = make_abstr(q_table, Abstr_type.PI_STAR)
abstr_mdp = AbstractMDP(mdp, state_abstr)

# Randomly select our list of states and print them out
states_to_corrupt = np.random.choice(mdp.get_all_possible_states(),
                                     size=STATE_NUM,
                                     replace=False)
for state in states_to_corrupt:
    print(state)

# Create a corrupt MDP
corr_mdp = make_corruption(abstr_mdp, states_to_corrupt)

コード例 #10

0

ファイルを表示

def test_agent_abstraction():

    # Make agent and MDP
    mdp = GridWorldMDP()
    agent = Agent(mdp)

    # Run for some number of steps
    while agent._episode_counter < EP_COUNT:
        agent.explore()

    # Make a new abstraction based on the learned q-table
    num_abstr_states, num_reduced_ground_states = agent.make_abstraction(
        Abstr_type.PI_STAR, epsilon=THRESHOLD, ignore_zeroes=IGNORE_ZEROES)
    agent._epsilon = agent._epsilon / PARAM_CUT

    # Count the number of abstract states so we can see how much this is changing
    key_count = 0
    ground_key_count = 0
    for key in agent._q_table.keys():
        key_count += 1
        #if isinstance(key[0], TaxiState):
        if isinstance(key[0], GridWorldState):
            ground_key_count += 1
    #print(key_count - ground_key_count)

    # Print the state abstraction so we can see what's up
    #for key, value in agent.mdp.state_abstr.abstr_dict.items():
    #    print(key, value)

    #for key, value in agent._q_table.items():
    #    print(key[0], key[1], value)

    # Run for an episode to see what the reward is
    curr_state = agent.get_current_state()
    cumu_reward = 0
    discount = 1
    while not curr_state.is_terminal():
        state, action, next_state, reward = agent.explore()
        cumu_reward += reward * discount
        discount *= agent.mdp.gamma
        curr_state = next_state

    #print("Test", cumu_reward, end=' ')

    # Control agent, no abstraction performed
    mdp_control = GridWorldMDP()
    agent_control = Agent(mdp_control)

    # Train the agent for the same number of episodes
    curr_state = agent_control.mdp.get_current_state()
    while agent_control._episode_counter < EP_COUNT:
        agent_control.explore()

    # Run control agent for an episode to see what the reward is
    curr_state = agent_control.get_current_state()
    control_cumu_reward = 0
    discount = 1
    while not curr_state.is_terminal():
        state, action, next_state, reward = agent_control.explore()
        control_cumu_reward += reward * discount
        discount *= agent_control.mdp.gamma
        curr_state = next_state
    #print("Control", cumu_reward)
    #print("Delta", cumu_reward - control_cumu_reward)
    return cumu_reward, control_cumu_reward, key_count - ground_key_count, num_reduced_ground_states - num_abstr_states

コード例 #11

0

ファイルを表示

    test_udm(mdp, Abstr_type.Q_STAR, EPISODE_COUNT, error_dict=mild_error_1)
    quit()
    '''

    # A-star with mild error 1
    '''
    test_udm(mdp, Abstr_type.A_STAR, EPISODE_COUNT, error_dict=mild_error_1)
    '''

    # Pi-star with mild error 1
    '''
    test_udm(mdp, Abstr_type.PI_STAR, EPISODE_COUNT, error_dict=mild_error_1)
    '''

    # Large MDP
    mdp = GridWorldMDP(goal_location=[(7, 11)])

    # Large bad error
    large_bad_error = {
        GridWorldState(2, 1): GridWorldState(8, 11),
        GridWorldState(1, 2): GridWorldState(7, 10)
    }

    # Test Q-Star with large MDP, bad error
    test_udm(mdp, Abstr_type.Q_STAR, EPISODE_COUNT, error_dict=large_bad_error)
    """
    # Run the split test 5 times on a true Q-star abstraction to see how much it splits
    for i in range(NUM_TESTS):
        mdp = TwoRoomsMDP(lower_width=3,
                          lower_height=3,
                          upper_width=3,

コード例 #12

0

ファイルを表示

ファイル: test_abstr_agent.py プロジェクト: AstronautCharlie/Real_Simple_RL

def iterate_detachment(mdp_key, batch_size=5000):
    """
    Load an incorrect abstraction. Train the model, generate a roll-out, detach the first cycle state. Repeat until
    the roll-out achieves a terminal state. Save the adjusted abstraction and learned policy. Visualize the original
    incorrect abstraction with roll-outs from original agents and the adjusted abstraction with a roll-out from the
    new agent
    :param key: key for incorrect (poorly performing) abstraction
    :param batch_size: Number of steps to train between state detachments
    """
    # Load a poorly-performing abstraction
    names = ['AbstrType', 'AbstrEps', 'CorrType', 'CorrProp', 'Batch', 'Dict']
    df = pd.read_csv('../abstr_exp/corrupted/corrupted_abstractions.csv', names=names)
    abstr_string = df.loc[(df['AbstrType'] == str(mdp_key[0]))
                        & (df['AbstrEps'] == mdp_key[1])
                        & (df['CorrType'] == str(mdp_key[2]))
                        & (df['CorrProp'] == mdp_key[3])
                        & (df['Batch'] == mdp_key[4])]['Dict'].values[0]
    abstr_list = ast.literal_eval(abstr_string)
    abstr_dict = {}
    for el in abstr_list:
        is_term = el[0][0] == 11 and el[0][1] == 11
        state = GridWorldState(el[0][0], el[0][1], is_terminal=is_term)
        abstr_dict[state] = el[1]

    # Create an agent with this abstraction
    s_a = StateAbstraction(abstr_dict, abstr_type=Abstr_type.PI_STAR)
    mdp = GridWorldMDP()
    agent = AbstractionAgent(mdp, s_a=s_a)

    # Generate a roll-out from untrained model (should be random and short)
    rollout = agent.generate_rollout()
    print('Roll-out from untrained model')
    for state in rollout:
        print(state, end=', ')
    print()

    # Until roll-out leads to terminal state, explore and detach last state of roll-out. Record each of the detached
    #  states so they can be visualized later
    detached_states = []
    step_counter = 0
    while not rollout[-1].is_terminal():
        for i in range(batch_size):
            agent.explore()
        step_counter += batch_size
        rollout = agent.generate_rollout()
        print('Roll-out after', step_counter, 'steps')
        for state in rollout:
            print(state, end=', ')
        print()
        print('State Q-value pre-detach:')
        for action in agent.mdp.actions:
            print(rollout[-1], action, agent.get_q_value(rollout[-1], action))
        detach_flag = agent.detach_state(rollout[-1])
        if detach_flag == 0:
            print('Detaching state', rollout[-1])
            detached_states.append(rollout[-1])
        elif detach_flag == 1:
            print(rollout[-1], 'already a singleton state. No change.')
        print('State Q-value post-detach:')
        for action in agent.mdp.actions:
            print(rollout[-1], action, agent.get_q_value(rollout[-1], action))
        print()
    for key, value in agent.get_q_table():
        print(key, value)

    # Save resulting adapted state abstraction and learned policy
    s_a_file = open('../abstr_exp/adapted/adapted_abstraction.csv', 'w', newline='')
    s_a_writer = csv.writer(s_a_file)
    print(mdp_key)
    s_a_writer.writerow((mdp_key[0], mdp_key[1], mdp_key[2], mdp_key[3], mdp_key[4], agent.get_abstraction_as_string()))
    s_a_file.close()

    policy_file = open('../abstr_exp/adapted/learned_policy.csv', 'w', newline='')
    policy_writer = csv.writer(policy_file)
    policy_writer.writerow((mdp_key[0], mdp_key[1], mdp_key[2], mdp_key[3], mdp_key[4],
                            agent.get_learned_policy_as_string()))
    policy_file.close()

    # Visualize the adapted state abstraction and learned policy, along with the original for comparison
    viz = GridWorldVisualizer()
    surface = viz.create_corruption_visualization(mdp_key,
                                                  '../abstr_exp/adapted/adapted_abstraction.csv',
                                                  error_file='../abstr_exp/corrupted/error_states.csv')
    # Draw small white circles over the states that were detached
    for state in detached_states:
        print(state, end=', ')
    #for d_state in
    viz.display_surface(surface)

コード例 #13

0

ファイルを表示

ファイル: run_experiment.py プロジェクト: AstronautCharlie/Real_Simple_RL

from Experiment.ExperimentClass import Experiment
from GridWorld.GridWorldMDPClass import GridWorldMDP
from GridWorld.GridWorldStateClass import GridWorldState
from GridWorld.TaxiMDPClass import TaxiMDP
from GridWorld.LargeTaxiMDPClass import LargeTaxiMDP
from GridWorld.TwoRoomsMDP import TwoRoomsMDP
from Agent.AgentClass import Agent
from resources.AbstractionTypes import Abstr_type
from resources.AbstractionCorrupters import *
from util import *
from Visualizer.QValueVisualizer import QValueVisualizer
import scipy.stats

# MDP details
MDP = GridWorldMDP()
MDP = TaxiMDP(same_goal=True)
#MDP = LargeTaxiMDP(same_goal=True, gamma=0.9)
mdp_sum = 'Taxi MDP'
'''
MDP = TwoRoomsMDP(upper_height=3,
                  upper_width=3,
                  lower_height=3,
                  lower_width=3,
                  hallway_states=[3], goal_location=[(1,5)])

MDP = TwoRoomsMDP(lower_width=1,
                  lower_height=1,
                  hallway_states=[1],
                  upper_height=0,
                  upper_width=0,

コード例 #14

0

ファイルを表示

        if dct[key] != 0.0:
            nonzero_count += 1
    return nonzero_count


def print_q_table(q_table):
    for key in q_table:
        print(key[0], key[1], q_table[key])


if __name__ == '__main__':

    # GridWorld

    # Make ground MDP
    mdp = GridWorldMDP(slip_prob=0.0)
    # Run VI to get q-table
    vi = ValueIteration(mdp)
    vi.run_value_iteration()
    q_table = vi.get_q_table()
    # Make state abstractions
    q_star_abstr = make_abstr(q_table, Abstr_type.Q_STAR)
    a_star_abstr = make_abstr(q_table, Abstr_type.A_STAR)
    pi_star_abstr = make_abstr(q_table, Abstr_type.PI_STAR)
    # Make abstract MDPs - NOTE THIS CLASS HAS BEEN DEPRECATED DO NOT USE
    q_mdp = AbstractGridWorldMDP(state_abstr=q_star_abstr)
    a_mdp = AbstractGridWorldMDP(state_abstr=a_star_abstr)
    pi_mdp = AbstractGridWorldMDP(state_abstr=pi_star_abstr)

    # This is the type of
    q2_mdp = AbstractMDP(mdp, state_abstr=q_star_abstr)

コード例 #15

0

ファイルを表示

ファイル: test_policy_representation.py プロジェクト: AstronautCharlie/Real_Simple_RL

                best_action_intersect = list(
                    set(best_actions_1) & set(best_actions_2))
                if len(best_action_intersect) == 0:
                    return False
    return True


def print_policy(policy):
    '''
    Print the policy
    '''
    for key in policy.keys():
        print(key, policy[key])


if __name__ == '__main__':
    # Test that optimal ground policy for FourRooms is representable in
    # abstaction given by Q*

    # Get optimal ground policy for FourRooms
    four_rooms = GridWorldMDP(slip_prob=0.0, gamma=0.99)
    vi = ValueIteration(four_rooms)
    vi.run_value_iteration()
    optimal_policy = vi.get_optimal_policy()
    #print_policy(optimal_policy)

    # Get Q* abstraction for FourRooms and optimal abstract policy
    abstr = make_abstr(vi.get_q_table(), Abstr_type.A_STAR)

    print(is_optimal_policy_representable(vi, optimal_policy, abstr))

コード例 #16

0

ファイルを表示

"""
Test the error visualizer from QValueVisualizer
"""

from Visualizer.QValueVisualizer import QValueVisualizer
from GridWorld.GridWorldMDPClass import GridWorldMDP

if __name__ == '__main__':
    mdp = GridWorldMDP()
    v = QValueVisualizer(results_dir='../exp_output/big_test',
                         states_to_track=mdp.get_all_possible_states())
    v.visualize_q_value_error('noisy',
                              mdp,
                              episodes=[i for i in range(50, 1000, 50)])

コード例 #17

0

ファイルを表示

ファイル: TrackingAgent.py プロジェクト: AstronautCharlie/Real_Simple_RL

    # Create environment
    mdp = TwoRoomsMDP(lower_width=3,
                      upper_width=3,
                      lower_height=3,
                      upper_height=3,
                      hallway_states=[3],
                      goal_location=[(1, 5)])
    error_dict = {
        GridWorldState(1, 2): GridWorldState(2, 5),
        GridWorldState(3, 3): GridWorldState(1, 6)
    }

    ABSTR_TYPE = Abstr_type.Q_STAR
    ERROR_NUM = 6

    mdp = GridWorldMDP()
    if ABSTR_TYPE == Abstr_type.Q_STAR:
        abstr_mdp = mdp.make_abstr_mdp(Abstr_type.Q_STAR)
        if ERROR_NUM == 1:
            error_dict = {
                GridWorldState(6, 3): GridWorldState(10, 9),
                GridWorldState(9, 10): GridWorldState(9, 3)
            }
        elif ERROR_NUM == 2:
            error_dict = {
                GridWorldState(9, 8): GridWorldState(2, 1),
                GridWorldState(9, 11): GridWorldState(2, 4)
            }
        # Lower right room all grouped together
        elif ERROR_NUM == 3:
            error_dict = {

コード例 #18

0

ファイルを表示

ファイル: test_abstract_visualizer.py プロジェクト: AstronautCharlie/Real_Simple_RL

# #         print("epsilon, alpha:", agent._epsilon, agent._alpha)
# #     current_state, action, next_state, _ = agent.explore()
# # state_abstr = make_abstr(agent.get_q_table(), Abstr_type.Q_STAR, epsilon=0.05)
# # abstr_grid_mdp = AbstractGridWorldMDP(state_abstr=state_abstr)
# # abs_agent = Agent(abstr_grid_mdp)
# # abs_g_viz = AbstractGridWorldVisualizer(abstr_grid_mdp,abs_agent)
# # #abs_g_viz.displayAbstractMDP()
# # for i in range(100000):
# #     if i % 1000 == 0:
# #         print("epsilon, alpha:", abs_agent._epsilon, abs_agent._alpha)
# #     current_state, action, next_state,_  = abs_agent.explore()
# #
# # abs_g_viz.visualizeLearnedPolicy()

#Q-STAR - USING VI
mdp = GridWorldMDP(slip_prob=0, gamma=0.99)
vi = ValueIteration(mdp)
vi.run_value_iteration()
q_table = vi.get_q_table()
q_star_abstr = make_abstr(q_table, Abstr_type.Q_STAR, epsilon=0.01)
abstr_grid_mdp = AbstractGridWorldMDP(state_abstr=q_star_abstr)
abs_agent = Agent(abstr_grid_mdp)
abs_g_viz = AbstractGridWorldVisualizer(abstr_grid_mdp, abs_agent)
#abs_g_viz.displayAbstractMDP()
for i in range(100000):
    if i % 1000 == 0:
        print("epsilon, alpha:", abs_agent._epsilon, abs_agent._alpha)
    current_state, action, next_state, _ = abs_agent.explore()

abs_g_viz.visualizeLearnedPolicy()
'''