def run_no_speech(task_block, task_room, photo_pos, drone_pos, pub, drone_path): """ Assume the block is on the floor of each cell Get initial pos of drone from caller """ height = 2 # vertical space task = DroneTask(task_block, task_room) room1 = DroneRoom("room1", [(x, y, z) for x in range(4) for y in range(1) for z in range(height)], "red") room2 = DroneRoom("room2", [(x, y, z) for x in range(0, 2) for y in range(2, 4) for z in range(height)], color="green") room3 = DroneRoom("room3", [(x, y, z) for x in range(3, 4) for y in range(2, 4) for z in range(height)], color="blue") block1 = DroneBlock("block1", photo_pos[0], photo_pos[1], photo_pos[2] - 1, color="photo") rooms = [room1, room2, room3] blocks = [block1] doors = [DroneDoor(1, 1, height), DroneDoor(3, 1, height)] mdp = DroneMDP(drone_pos, task, rooms=rooms, blocks=blocks, doors=doors) print("Start Value Iteration") vi = ValueIteration(mdp) vi.run_vi() action_seq, state_seq = vi.plan(mdp.init_state) policy = defaultdict() for i in range(len(action_seq)): policy[state_seq[i]] = action_seq[i] print("Start Flying") mdp.send_path(policy, pub, drone_path)
def main(): height = 2 # vertical space task = DroneTask("red", "None") room1 = DroneRoom("room1", [(x, y, z) for x in range(0, 4) for y in range(0, 1) for z in range(height)], "red") room2 = DroneRoom("room2", [(x, y, z) for x in range(0, 2) for y in range(2, 3) for z in range(height)], color="green") room3 = DroneRoom("room3", [(x, y, z) for x in range(3, 4) for y in range(2, 3) for z in range(height)], color="blue") block1 = DroneBlock("block1", 0, 2, 0, color="red") block2 = DroneBlock("block2", 2, 0, -1, color="green") block3 = DroneBlock("block3", 3, 2, 0, color="blue") rooms = [room1, room2, room3] blocks = [block1, block2, block3] doors = [DroneDoor(1, 1, height), DroneDoor(3, 1, height)] mdp = DroneMDP((0, 0, 0), task, rooms=rooms, blocks=blocks, doors=doors) # print("Start Q learning") # ql_agent = QLearningAgent(actions=mdp.get_actions()) # # run_agents_on_mdp([ql_agent], mdp, instances=2, episodes=2500, steps=100, reset_at_terminal=True, verbose=True) # run_single_agent_on_mdp(ql_agent, mdp, episodes=2000, steps=200) print("Start Value Iteration") vi = ValueIteration(mdp) vi.run_vi() action_seq, state_seq = vi.plan(mdp.init_state) policy = defaultdict() for i in range(len(action_seq)): policy[state_seq[i]] = action_seq[i] print("Start AirSim") # mdp.visualize_agent(ql_agent) mdp.visualize_policy(policy)
def plan_with_vi(gamma=0.99): ''' Args: gamma (float): discount factor Running value iteration on the problem to test the correctness of the policy returned by BSS ''' mdp = GridWorldMDP(gamma=gamma, goal_locs=[(4, 3)], slip_prob=0.0) value_iter = ValueIteration(mdp, sample_rate=5) value_iter.run_vi() action_seq, state_seq = value_iter.plan(mdp.get_init_state()) print "[ValueIteration] Plan for {}".format(mdp) for i in range(len(action_seq)): print 'pi({}) --> {}'.format(state_seq[i], action_seq[i])
def get_policy(self, mdp, verbose=False): ''' Args: mdp (MDP): MDP (same level as the current Policy Generator) Returns: policy (defaultdict): optimal policy in mdp ''' vi = ValueIteration(mdp, sample_rate=1) vi.run_vi() policy = defaultdict() action_seq, state_seq = vi.plan(mdp.init_state) if verbose: print('Plan for {}:'.format(mdp)) for i in range(len(action_seq)): if verbose: print("\tpi[{}] -> {}".format(state_seq[i], action_seq[i])) policy[state_seq[i]] = action_seq[i] return policy
def planFromAtoB(self, Maps, nearestVertex, kStepConfig): # if not self.computedMDP: # self.wallLocations = [] # for x in range(len(self.Maps.occupancyMap)): # for y in range(len(self.Maps.occupancyMap[x])): # if self.Maps.occupancyMap[x][y] == Env.WALL: # self.wallLocations.append(Loc.Location(x,y)) # self.computedMDP = True mdp = GridWorldMDP(width=len(Maps.occupancyMap), height=len(Maps.occupancyMap[0]), init_loc=(nearestVertex.x, nearestVertex.y), goal_locs=[(kStepConfig.x, kStepConfig.y)], gamma=0.95) vi = ValueIteration(mdp) vi.run_vi() action_seq, state_seq = vi.plan() #check if conflict for s in state_seq: if Maps.occupancyMap[s[0], s[1]] == env.WALL: return False return True
def main(): # This accepts arguments from the command line with flags. # Example usage: python value_iteration_demo.py -w 4 -H 3 -s 0.05 -g 0.95 -il [(0,0)] -gl [(4,3)] -ll [(4,2)] -W [(2,2)] parser = argparse.ArgumentParser( description= 'Run a demo that shows a visualization of value iteration on a GridWorld MDP' ) # Add the relevant arguments to the argparser parser.add_argument( '-w', '--width', type=int, nargs="?", const=5, default=5, help= 'an integer representing the number of cells for the GridWorld width') parser.add_argument( '-H', '--height', type=int, nargs="?", const=5, default=5, help= 'an integer representing the number of cells for the GridWorld height') parser.add_argument( '-s', '--slip', type=float, nargs="?", const=0.05, default=0.05, help= 'a float representing the probability that the agent will "slip" and not take the intended action' ) parser.add_argument( '-g', '--gamma', type=float, nargs="?", const=0.95, default=0.95, help='a float representing the decay probability for Value Iteration') parser.add_argument( '-il', '--i_loc', type=tuple, nargs="?", const=(0, 0), default=(0, 0), help= 'two integers representing the starting cell location of the agent [with zero-indexing]' ) parser.add_argument( '-gl', '--g_loc', type=list, nargs="?", const=[(3, 3)], default=[(3, 3)], help= 'a sequence of integer-valued coordinates where the agent will receive a large reward and enter a terminal state' ) args = parser.parse_args() mdp = generate_MDP(args.width, args.height, args.i_loc, args.g_loc, args.gamma, args.slip) # Run value iteration on the mdp and save the history of value backups until convergence vi = ValueIteration(mdp, max_iterations=1) _, _, histories = vi.run_vi_histories() # For every value backup, visualize the policy for value_dict in histories: #mdp.visualize_policy(lambda in_state: value_dict[in_state]) # Note: This lambda is necessary because the policy must be a function #time.sleep(0.5) print("========================") for k in value_dict.keys( ): # Note: This lambda is necessary because the policy must be a function print(str(k) + " " + str(value_dict[k])) print(vi.plan(state=mdp.init_state))