def obstacles_room(plotV=True, num_sample=100, computeV=False): height = 10 width = 10 reward_location = 18 initial_state = None # np.array([25]) obstacles_location = [ 12, 13, 22, 23, 35, 36, 45, 46, 62, 63, 72, 73, 67, 77 ] walls_location = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 ] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample) V = None if computeV: V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location, obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('graphs/obstacleRoom_trueV.pdf') plt.close() return maze, V
def oneroom(plotV=True, num_sample=100, computeV=False): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample) V = None if computeV: V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location, obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('plots/one_room_trueV.pdf') plt.close() return maze, V
def example_grid_maze(plotV=True): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [14, 13, 24, 23, 29, 28, 39, 38] # range(height*width) walls_location = [50, 51, 52, 53, 54, 55, 56, 74, 75, 76, 77, 78, 79] obstacles_transition_probability = .2 maze = LearningMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability, num_sample=2000) def value_iteration(G, finish_state, obstacles, walls): V = [0] * G.N R = [0] * G.N R[finish_state] = 100 gamma = 0.9 success_prob = [1] * G.N for i in obstacles: success_prob[i] = obstacles_transition_probability for i in walls: success_prob[i] = .0 epsilon = .0001 diff = 100 iterations = 0 while diff > epsilon: iterations = iterations + 1 diff = 0 for s in xrange(G.N): if s == finish_state: max_a = success_prob[s] * R[s] else: max_a = float('-inf') for s_prime in G.W.getcol(s).nonzero()[0]: new_v = success_prob[s] * (R[s] + gamma * V[s_prime]) if new_v > max_a: max_a = new_v diff = diff + abs(V[s] - max_a) V[s] = max_a print "number of iterations in Value Iteration:" print iterations return V V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('graphs/simpleMaze_trueV.pdf') plt.close() return maze, V
def main(): for discount in DISCOUNT: for dimension in DIMENSION: for grid_size in GRID_SIZES: print('>>>>>>>>>>>>>>>>>>>>>>>>>> Simulation grid of size : ' + str(grid_size) + 'x' + str(grid_size)) print( '>>>>>>>>>>>>>>>>>>>>>>>>>> dimension basis function : ' + str(dimension)) print('>>>>>>>>>>>>>>>>>>>>>>>>>> discount factor : ' + str(discount)) height = width = grid_size num_states = grid_size * grid_size reward_location = grid_size - 1 obstacles_location = [] walls_location = [] maze = LearningMazeDomain(height, width, reward_location, walls_location, obstacles_location, num_sample=num_samples) pvf_all_results = {} for k in xrange(10): pvf_num_steps, pvf_learned_policy, pvf_samples, pvf_distances = maze.learn_proto_values_basis( num_basis=dimension, explore=0, discount=discount, max_steps=500, max_iterations=200) pvf_all_steps_to_goal, pvf_all_samples, pvf_all_cumulative_rewards = simulate( num_states, reward_location, walls_location, maze, pvf_learned_policy) pvf_all_results[k] = { 'steps_to_goal': pvf_all_steps_to_goal, 'samples': pvf_all_samples, 'cumul_rewards': pvf_all_cumulative_rewards, 'learning_distances': pvf_distances } plot_results(pvf_all_results, grid_size, reward_location, dimension, discount, num_samples)
def low_stretch_tree_maze(num_sample=100, length_sample=100): reward_location = [15] obstacles_location = [] obstacles_transition_probability = .2 domain = domains.SymmetricMazeDomain(rewards_locations=reward_location, obstacles_location=obstacles_location) maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample) return maze
def tworooms_nowalls(num_sample=3000): height = 10 width = 10 reward_location = 18 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [40, 41, 42, 43, 44, 46, 47, 48, 49] obstacles_transition_probability = .2 maze = LearningMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability, num_sample=num_sample) return maze
def example_grid_maze(): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [14, 13, 24, 23, 29, 28, 39, 38] # range(height*width) walls_location = [50, 51, 52, 53, 54, 55, 56, 74, 75, 76, 77, 78, 79] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=2000) return maze
def oneroom(plotV=True, num_sample=100, length_sample=100, computeV=False): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample) return maze
def tworooms(num_sample=3000): height = 10 width = 10 reward_location = 18 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 41, 42, 43, 44, 46, 47, 48, 49] obstacles_transition_probability = .2 maze = LearningMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability, num_sample=num_sample) return maze
def obstacles_room(plotV=True, num_sample=100, length_sample=100, computeV=False): height = 10 width = 10 reward_location = 18 initial_state = None # np.array([25]) obstacles_location = [12, 13, 22, 23, 35, 36, 45, 46, 62, 63, 72, 73, 67, 77] walls_location = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample) return maze
def low_stretch_tree_maze(plotV=True, num_sample=100, computeV=False): reward_location = [15] obstacles_location = [] obstacles_transition_probability = .2 domain = domains.SymmetricMazeDomain(rewards_locations=reward_location, obstacles_location=obstacles_location) maze = LearningMazeDomain(domain=domain, num_sample=num_sample) V = None if computeV: V = value_iteration(maze.domain.graph, reward_location[0], obstacles_location, [], obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('graphs/lowStretchTree_trueV.pdf') plt.close() return maze, V
def threerooms(num_sample=5000, length_sample=100): height = 50 width = 100 reward_location = 198 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [] walls_location.extend(range(100)) walls_location.extend(range(4900, 5000)) walls_location.extend(range(0, 5000, 100)) walls_location.extend(range(99, 5000, 100)) walls_location.extend(range(1600, 1670)) walls_location.extend(range(1680, 1700)) walls_location.extend(range(3200, 3220)) walls_location.extend(range(3230, 3300)) obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample) return maze
def threerooms(plotV=True, num_sample=5000, computeV=False): height = 50 width = 100 reward_location = 198 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [] walls_location.extend(range(100)) walls_location.extend(range(4900, 5000)) walls_location.extend(range(0, 5000, 100)) walls_location.extend(range(99, 5000, 100)) walls_location.extend(range(1600, 1670)) walls_location.extend(range(1680, 1700)) walls_location.extend(range(3200, 3220)) walls_location.extend(range(3230, 3300)) obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample) V = None if computeV: V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location, obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('graphs/threeRooms_trueV.pdf') plt.close() return maze, V
def example_grid_maze(plotV=True): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [14, 13, 24, 23, 29, 28, 39, 38] # range(height*width) walls_location = [50, 51, 52, 53, 54, 55, 56, 74, 75, 76, 77, 78, 79] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=2000) V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location, obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('graphs/simpleMaze_trueV.pdf') plt.close() return maze, V