def obstacles_room(plotV=True, num_sample=100, computeV=False): height = 10 width = 10 reward_location = 18 initial_state = None # np.array([25]) obstacles_location = [ 12, 13, 22, 23, 35, 36, 45, 46, 62, 63, 72, 73, 67, 77 ] walls_location = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 ] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample) V = None if computeV: V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location, obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('graphs/obstacleRoom_trueV.pdf') plt.close() return maze, V
def oneroom(plotV=True, num_sample=100, computeV=False): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample) V = None if computeV: V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location, obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('plots/one_room_trueV.pdf') plt.close() return maze, V
def update_domain(self, new_reward_location, obstacles_location, num_samples, length_sample): self.reward_location = new_reward_location self.obstacles_location = obstacles_location self.num_samples = num_samples self.length_sample = length_sample self.domain = domains.GridMazeDomain(self.height, self.width, new_reward_location, self.walls_location, self.obstacles_location, self.initial_state, self.obstacles_transition_probability) self.compute_samples()
def example_grid_maze(): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [14, 13, 24, 23, 29, 28, 39, 38] # range(height*width) walls_location = [50, 51, 52, 53, 54, 55, 56, 74, 75, 76, 77, 78, 79] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=2000) return maze
def oneroom(plotV=True, num_sample=100, length_sample=100, computeV=False): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample) return maze
def tworooms(num_sample=100, length_sample=100, discount=0.9): height = 10 width = 10 reward_location = 18 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 41, 42, 43, 44, 46, 47, 48, 49] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample, discount=discount) return maze
def obstacles_room(plotV=True, num_sample=100, length_sample=100, computeV=False): height = 10 width = 10 reward_location = 18 initial_state = None # np.array([25]) obstacles_location = [12, 13, 22, 23, 35, 36, 45, 46, 62, 63, 72, 73, 67, 77] walls_location = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample) return maze
def threerooms(num_sample=5000, length_sample=100): height = 50 width = 100 reward_location = 198 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [] walls_location.extend(range(100)) walls_location.extend(range(4900, 5000)) walls_location.extend(range(0, 5000, 100)) walls_location.extend(range(99, 5000, 100)) walls_location.extend(range(1600, 1670)) walls_location.extend(range(1680, 1700)) walls_location.extend(range(3200, 3220)) walls_location.extend(range(3230, 3300)) obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample) return maze
def threerooms(plotV=True, num_sample=5000, computeV=False): height = 50 width = 100 reward_location = 198 initial_state = None # np.array([25]) obstacles_location = [] # range(height*width) walls_location = [] walls_location.extend(range(100)) walls_location.extend(range(4900, 5000)) walls_location.extend(range(0, 5000, 100)) walls_location.extend(range(99, 5000, 100)) walls_location.extend(range(1600, 1670)) walls_location.extend(range(1680, 1700)) walls_location.extend(range(3200, 3220)) walls_location.extend(range(3230, 3300)) obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=num_sample) V = None if computeV: V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location, obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('graphs/threeRooms_trueV.pdf') plt.close() return maze, V
def example_grid_maze(plotV=True): height = 10 width = 10 reward_location = 9 initial_state = None # np.array([25]) obstacles_location = [14, 13, 24, 23, 29, 28, 39, 38] # range(height*width) walls_location = [50, 51, 52, 53, 54, 55, 56, 74, 75, 76, 77, 78, 79] obstacles_transition_probability = .2 domain = domains.GridMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state, obstacles_transition_probability) maze = LearningMazeDomain(domain=domain, num_sample=2000) V = value_iteration(maze.domain.graph, reward_location, obstacles_location, walls_location, obstacles_transition_probability) if plotV: fig, ax = plt.subplots(1, 1) maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax) plt.savefig('graphs/simpleMaze_trueV.pdf') plt.close() return maze, V