def helper_bfs(init_state, goal_state): helper = problem.Helper() state_dictionary = helper.get_successor(init_state) visited = [] visited.append([init_state.x, init_state.y, init_state.orientation]) queue = list() init_cost = 1 for key in state_dictionary: l = list(key.split(",")) queue.append([l, state_dictionary[key][0], init_cost]) cost = float('inf') while (queue): node = queue.pop(0) if node[1].x < 0 or node[1].y < 0: continue else: if [node[1].x, node[1].y, node[1].orientation] not in visited: if node[1].x == goal_state.x and node[1].y == goal_state.y: cost = node[2] break else: visited.append([node[1].x, node[1].y, node[1].orientation]) successor = helper.get_successor(node[1]) for key in successor: l1 = list(key.split(",")) queue.append( [node[0] + l1, successor[key][0], node[2] + 1]) # mem.update({list(init_state) : cost}) #print(type(init_state)) d[init_state_tuple] = cost return cost
def gbfs(use_custom_heuristic): helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() state_dictionary = helper.get_successor(init_state) path_cost = 0 action_list1 = [] node = (init_state, action_list1) frontier = [] priority = 0 heapq.heappush(frontier, (priority, node)) explored = [] action_list100 = [] while (len(frontier) != 0): popped_node = heapq.heappop(frontier) current_state, action_list1 = popped_node[1] action_list = [] state_dictionary = helper.get_successor(current_state) if (helper.is_goal_state(current_state)): # print "goal_state found = ", current_state print "goal found" action_list100 = action_list1 break explored.append(current_state) for action in possible_actions: newaction = [action] action_list = action_list1 + newaction next_state = state_dictionary[action][0] if use_custom_heuristic == True: priority = (goal_state.x - next_state.x)**2 + (goal_state.y - next_state.y)**2 else: priority = abs(goal_state.x - next_state.x) + abs(goal_state.x - next_state.y) if (state_dictionary[action][1] == -1): continue child = (next_state, action_list) k = len(explored) l = len(frontier) flag1 = 0 flag2 = 0 for i in range(l): if (frontier[i][1][0] == next_state): flag1 = 1 break for j in range(k): if (explored[j] == next_state): flag2 = 1 break if (flag1 == 1 or flag2 == 1): continue if (flag1 == 0 and flag2 == 0): heapq.heappush(frontier, (priority, child)) return action_list100
def __init__(self, task, headless=1, sample=1, episodes=1): rospy.init_node('qlearning', anonymous=True) root_path = os.path.abspath( os.path.join(os.path.dirname(__file__), os.path.pardir)) self.books_json_file = root_path + "/books.json" self.books = json.load(open(self.books_json_file)) self.helper = problem.Helper() self.helper.reset_world() self.headless = headless self.alpha = 0.3 self.gamma = 0.9 self.root_path = root_path if (task == 1): trajectories_json_file = root_path + "/trajectories{}.json".format( sample) q_values = self.task1(trajectories_json_file) elif (task == 2): q_values = self.task2(episodes) elif (task == 3): q_values = self.task3(episodes) elif (task == 4): q_values = self.task4(episodes) elif (task == 5): q_values = self.task5(episodes) with open(root_path + "/q_values.json", "w") as fout: json.dump(q_values, fout)
def ucs(use_custom_heuristic): ''' Perform UCS to find sequence of actions from initial state to goal state ''' helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] # to get the possible action->(state,cost) mapping from current state state_dictionary = helper.get_successor(init_state) L_heap = [] heapq.heappush(L_heap, (0,0, ["mv"],init_state)) #L.put([["Mv"],init_state]) visited=[] result=False counter=0 while L_heap: #current=L.get() current=heapq.heappop(L_heap) if current[3].x==goal_state.x and current[3].y==goal_state.y: action_list=current[2] action_list.pop(0) result=True break elif current[3].x<0 or current[3].y<0: #print('state out of bound') continue elif current[3] in visited: #print('Already Visited') continue else: #print('New State') next_states=helper.get_successor(current[3]) visited.append(current[3]) for key,value in next_states.items(): path_list=current[2] path_list=path_list+[key] total_cost=current[0]+value[1] counter=counter+1 heapq.heappush(L_heap, (total_cost,counter, path_list, value[0])) if not result: action_list=[] ''' YOUR CODE HERE ''' return action_list
def states_from_action_list(action_list): helper = problem.Helper() state = helper.get_initial_state() states = [] for action in action_list: states.append(state) sucs = helper.get_successor(state) state, child_cost = sucs[action] pp.pprint(states)
def bfs(use_custom_heuristic): helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() path_cost = 0 action_list1 = [] node = (init_state, action_list1) node_action = (node, action_list1) frontier = [] frontier.append((node)) explored = [] action_list100 = [] while (len(frontier) != 0): current_state, action_list1 = frontier.pop(0) # print "current_state: ", current_state action_list = [] state_dictionary = helper.get_successor(current_state) key = state_dictionary.keys() if (helper.is_goal_state(current_state)): # print "goal_state found = ", current_state print "goal found" action_list100 = action_list1 break explored.append(current_state) for action in key: newaction = [action] action_list = action_list1 + newaction next_state = state_dictionary[action][0] if (state_dictionary[action][1] == -1): continue child = (next_state, action_list) k = len(explored) l = len(frontier) flag1 = 0 flag2 = 0 for i in range(l): if (frontier[i][0] == next_state): flag1 = 1 break for j in range(k): if (explored[j] == next_state): flag2 = 1 break if (flag1 == 1 or flag2 == 1): continue if (flag1 == 0 and flag2 == 0): frontier.append(child) return action_list100
def bfs(use_custom_heuristic, use_debug_mode): global debug_sequence if use_debug_mode: return debug_sequence helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] counter = 0 #For breaking ties state_dictionary = helper.get_successor(init_state) if helper.is_goal_state(init_state): print "Found Goal" return action_list root = init_state frontier = deque() actions_frontier = deque() frontier.append(root) actions_frontier.append([]) explored = set() counter += 1 while True: if len(frontier) == 0: print "No path possible!" return action_list node = frontier.popleft() node_actions = actions_frontier.popleft() explored.add(str(node)) sucs = helper.get_successor(node) for action in sucs: counter += 1 actions = node_actions[:] child_node, child_cost = sucs[action] if ((str(child_node) not in explored) and (child_node not in frontier)): # and (child_node not in frontier): actions.append(action) if helper.is_goal_state(child_node): print "Found goal! : " + str(child_node) return actions frontier.append(child_node) actions_frontier.append(actions) return action_list
def bfs(use_custom_heuristic): ''' Perform BFS to find sequence of actions from initial state to goal state ''' helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] # to get the possible action->(state,cost) mapping from current state state_dictionary = helper.get_successor(init_state) L=queue.Queue(maxsize=0) L.put([["Mv"],init_state]) visited=[] result=False while not L.empty(): current=L.get() if current[1].x==goal_state.x and current[1].y==goal_state.y: action_list=current[0] action_list.pop(0) result=True break elif current[1].x<0 or current[1].y<0: #print('state out of bound') continue elif current[1] in visited: continue else: #print('New State') next_states=helper.get_successor(current[1]) visited.append(current[1]) for key,value in next_states.items(): path_list=current[0] path_list=path_list+[key] L.put([path_list,value[0]]) if not result: action_list=[] ''' YOUR CODE HERE ''' return action_list
def ucs(use_custom_heuristic): ''' Perform UCS to find sequence of actions from initial state to goal state ''' helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] # to get the possible action->(state,cost) mapping from current state state_dictionary = helper.get_successor(init_state) visited = [] q3 = [] entry_count = 0 if not helper.is_goal_state(init_state): visited.append(init_state) for key, values in state_dictionary.items(): list_1 = [] temp = [] list_1.extend(key.split()) temp.append(list_1) temp.append(values[0]) heapq.heappush(q3, (values[1], entry_count, temp)) entry_count += 1 while q3: cur_node = heapq.heappop(q3) if cur_node[2][1].x < 0 or cur_node[2][1].y < 0: continue elif helper.is_goal_state(cur_node[2][1]): action_list = cur_node[2][0] break elif cur_node[2][1] in visited: continue else: temp = helper.get_successor(cur_node[2][1]) visited.append(cur_node[2][1]) for key, values in temp.items(): cost = 0 list_1 = [] temp = [] cost = values[1] + cur_node[0] list_1.extend(cur_node[2][0]) list_1.extend(key.split()) temp.append(list_1) temp.append(values[0]) heapq.heappush(q3, (cost, entry_count, temp)) entry_count += 1 return action_list
def ucs(use_custom_heuristic): ''' Perform UCS to find sequence of actions from initial state to goal state ''' helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] # to get the possible action->(state,cost) mapping from current state state_dictionary = helper.get_successor(init_state) ''' YOUR CODE HERE ''' if init_state.x != goal_state.x and init_state.y != goal_state.y: visited = [] queue = list() visited.append([init_state.x, init_state.y, init_state.orientation]) for key in state_dictionary: l = list(key.split(",")) queue.append( [l, state_dictionary[key][0], state_dictionary[key][1]]) while (queue): queue.sort(key=lambda ele: ele[2]) node = queue.pop(0) if node[1].x < 0 or node[1].y < 0: continue else: if [node[1].x, node[1].y, node[1].orientation] not in visited: if node[1].x == goal_state.x and node[1].y == goal_state.y: action_list = action_list + node[0] break else: visited.append( [node[1].x, node[1].y, node[1].orientation]) successor = helper.get_successor(node[1]) # print("Move: ", node[0]) # print("Successor: ",successor) for key in successor: l1 = list(key.split(",")) val = successor[key][1] queue.append([ node[0] + l1, successor[key][0], node[2] + val ]) return action_list
def bfs(use_custom_heuristic): ''' Perform BFS to find sequence of actions from initial state to goal state ''' helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] # to get the possible action->(state,cost) mapping from current state state_dictionary = helper.get_successor(init_state) import Queue as queue visited = [] q3 = queue.Queue() if not helper.is_goal_state(init_state): visited.append(init_state) for key, values in state_dictionary.items(): list_1 = [] temp = [] list_1.extend(key.split()) temp.append(list_1) temp.append(values[0]) q3.put(temp) while not q3.empty(): cur_node = q3.get() if cur_node[1].x < 0 or cur_node[1].y < 0: continue elif helper.is_goal_state(cur_node[1]): action_list = cur_node[0] break elif cur_node[1] in visited: continue else: temp = helper.get_successor(cur_node[1]) visited.append(cur_node[1]) for key, values in temp.items(): list_1 = [] temp = [] list_1.extend(cur_node[0]) list_1.extend(key.split()) temp.append(list_1) temp.append(values[0]) q3.put(temp) return action_list
def task1(self, trajectories_json_file): q_values = {} h = problem.Helper() actions = h.get_all_actions() reward = 1 with open(trajectories_json_file, 'r') as file: data = json.load(file) for index in data: q_values[index['state']] = {} for action in actions: q_values[index['state']][action] = 0 while reward <= 1: for i in range(len(data) - 1): q_values[data[i]['state']][ data[i]['action']] = q_values[data[i]['state']][ data[i]['action']] * (1 - self.alpha) + self.alpha * ( data[i]['reward'] + self.gamma * max(q_values[data[i + 1]['state']].values())) reward += 1 return q_values
def __init__(self, plan_file, planner): """ :param plan_file: Path to the file where plan is stored. This is generally stored at the path from where the planner was called. :type plan_file: str :param planner: Planner that was used to generate this plan. This is needed to parse the plan and convert to/store in a data structure in Python. Only possible values are FF and FD. :type planner: str Attributes: **status_subscriber**: To subscribe to the "/status" topic, which gives the State of the robot. **actions_queue** (tuple(action_name, action_params)): Used to store the refined action tuples. You can decide what parameters you want to store for each action. Store the parameters that you need to pass to execute_<action_name>_action methods of RobotActionServer class. **action_index** (int): Stores the index of action that needs to be sent to RobotActionServer for execution. """ rospy.init_node('listener', anonymous=True) self.status_subscriber = rospy.Subscriber('/status', String, self.status_callback) self.helper = problem.Helper() self.action_index = 0 self.actions_queue = self.refine_plan(plan_file, planner) self.execute_action() rospy.spin()
def task1(self, trajectories_json_file): helper = problem.Helper() q_values = {} all_actions = helper.get_all_actions() trajectory1 = open( trajectories_json_file) #location of the trajectory1 file List1 = json.load(trajectory1) lastelem = copy(List1[-1]) lastelem["reward"] = 0 for entry in List1: action = entry["action"] state = entry["state"] reward = entry["reward"] temp = {} for action1 in all_actions: temp[action1] = 0 q_values[state] = temp for (entry, next_entry) in zip(List1, List1[1:] + [lastelem]): action = entry["action"] state = entry["state"] reward = entry["reward"] next_state = next_entry["state"] temp = {} q_valuenext = {} q_valuenext = q_values[next_state] newstate = {} newstate = q_values[state] for action1 in all_actions: if action1 == action: q = self.alpha * (reward + self.gamma * (max(q_valuenext.values()))) newstate[action1] = q q_values[state] = newstate print(json.dumps(q_values, sort_keys=True, indent=6)) return q_values
def execute_action(self): """ This method picks an action from self.actions and send it to RobotActionServer for execution. :rtype: None """ helper = problem.Helper() if (self.action_index >= len(self.actions_queue)): return action = self.actions_queue[self.action_index] self.action_index += 1 print("Executing ", action[0]) if action[0] == 'move': helper.execute_move_action(action[1]) elif action[0] == 'pick': helper.execute_pick_action(action[1][0], action[1][1]) elif action[0] == 'collect': helper.execute_collect_action(action[1][0], action[1][1]) elif action[0] == 'serve': helper.execute_serve_action(action[1][0], action[1][1], action[1][2]) elif action[0] == 'throw': helper.execute_throw_action(action[1][0], action[1][1], action[1][2]) return
__docformat__ = 'reStructuredText' import rospy from std_msgs.msg import String import problem import json import os import argparse import time import heapq import numpy as np from robot import Robot from problem import State helper = problem.Helper() object_dict = {} bin_subject_name_dict = {} current_occupied_locs = set() age_dict = {} unplaced_books = set() priority_queue = [] current_state = () robot = Robot() def getLatestObjectDictionary(): global object_dict object_dict = helper.get_unplaced_books()
def task2(self, episodes): helper = problem.Helper() init_state1 = helper.get_current_state() init_state = json.dumps(init_state1) all_actions = helper.get_all_actions() action_qpair = {} for action in all_actions: action_qpair[action] = 0 state_dict = {} state_dict[init_state] = action_qpair state = init_state # state is a string stated = init_state1 # stated is a dictionary reward_list = [] iteration_list = [] for i in range(episodes): state = init_state stated = init_state1 reward_prev = 0 count = 0 step = 1 while (count == 0): if (helper.is_terminal_state(stated)): count = 1 epsilon = max(0.05, 0.7 - (0.05 * (i + 1))) x = state_dict[state].items() state_action = sorted(x, key=lambda t: t[1])[-1][ 0] #The optimal action that the agent is supposed to perform in a given state random_action = random.choice(all_actions) if (random.random() < epsilon): action_to_perform = random_action else: action_to_perform = state_action # action_to_perform is the full action with the objects parsed_action_1 = action_to_perform.split(" ") parsed_action = parsed_action_1[1:] param_cont = {} for l in parsed_action: if l[:4] == "book": param_cont["book_name"] = l else: param_cont["bin_name"] = l success, next_state1 = helper.execute_action( parsed_action_1[0], param_cont) # next_state1 is a dictionary next_state = json.dumps(next_state1) # next_state is a string state_list = state_dict.keys() label = 0 for statename in state_list: if statename == next_state: # trying to check if the next_state already exists as an entry in statename label = 1 break if label == 0: # if the next_state doesnot exist then the next state is iniatialized with zero qvalues for each of the actions. action_qpair = {} for action in all_actions: action_qpair[action] = 0 state_dict[next_state] = action_qpair reward = helper.get_reward(stated, parsed_action_1[0], next_state1) reward_cum = reward_prev + ((self.gamma**step) * reward ) # Calculating cumulative reward exploring_state_dict = state_dict[state] q_old = exploring_state_dict[action_to_perform] q = ((1 - self.alpha) * q_old) + ( self.alpha * (reward + self.gamma * max(state_dict[next_state].values())) ) # Calculating the q_value for being in a state and taking a specific action exploring_state_dict[ action_to_perform] = q # Plugging the q-value into the action q value dictionary state_dict[state] = exploring_state_dict state = next_state # state , next_state is a string stated = next_state1 # stated is a dictionary reward_prev = reward_cum step = step + 1 print str(i) + "," + str(reward_cum) reward_list = reward_list + [reward_cum] iteration_list = iteration_list + [i] helper.reset_world() q_values = {} q_values = state_dict #plt.plot(iteration_list) #plt.ylabel(reward_list) #plt.show() return q_values
def __init__(self): self.helper = problem.Helper()
def task3(self, episodes, books): def prune_actionlist(state1, book): all_action2 = helper.get_all_actions() location_robot = [state1["robot"]["x"], state1["robot"]["y"]] # Please put the path of books.json objdict = book book_name = objdict["books"] book_1d = book_name["book_1"] book_1_load_location = book_1d["load_loc"] book_1_load_location_1 = book_1_load_location[0] book_1_load_location_2 = book_1_load_location[1] book_2d = book_name["book_2"] book_2_load_location = book_2d["load_loc"] book_2_load_location_1 = book_2_load_location[0] book_2_load_location_2 = book_2_load_location[1] bin_name = objdict["bins"] bin_1d = bin_name["trolly_1"] bin_1_load_location = bin_1d["load_loc"] bin_2d = bin_name["trolly_2"] bin_2_load_location = bin_2d["load_loc"] if ((location_robot != book_1_load_location_1) and (location_robot != book_1_load_location_2) and (location_robot != book_2_load_location_1) and (location_robot != book_2_load_location_2)): for elem in all_action2: if (elem[:12] == "careful_pick" or elem[:11] == "normal_pick"): all_action2.remove(elem) if ((location_robot != bin_1_load_location[0]) and (location_robot != bin_1_load_location[1]) and (location_robot != bin_1_load_location[2]) and (location_robot != bin_1_load_location[3]) and (location_robot != bin_1_load_location[4]) and (location_robot != bin_1_load_location[5]) and (location_robot != bin_1_load_location[6]) and (location_robot != bin_1_load_location[7]) and (location_robot != bin_2_load_location[0]) and (location_robot != bin_1_load_location[1]) and (location_robot != bin_1_load_location[2]) and (location_robot != bin_1_load_location[3]) and (location_robot != bin_1_load_location[4]) and (location_robot != bin_1_load_location[5]) and (location_robot != bin_1_load_location[6]) and (location_robot != bin_1_load_location[7])): for elem in all_action2: if (elem[:13] == "careful_place" or elem[:12] == "normal_place"): all_action2.remove(elem) pruned_action_list = all_action2 return pruned_action_list helper = problem.Helper() init_state1 = helper.get_current_state() init_state = json.dumps(init_state1) all_actions = helper.get_all_actions() action_qpair = {} pruned_list = prune_actionlist( init_state1, books ) # pruning the action list with the function prune_actionlist() for action in pruned_list: action_qpair[action] = 0 state_dict = {} state_dict[init_state] = action_qpair state = init_state # state is a string stated = init_state1 # stated is a dictionary reward_list = [] iteration_list = [] for i in range(episodes): state = init_state stated = init_state1 reward_prev = 0 count = 0 step = 1 while (count == 0): if (helper.is_terminal_state(stated)): count = 1 epsilon = max(0.05, 0.7 - (0.05 * i)) x = state_dict[state].items() state_action = sorted(x, key=lambda t: t[1])[-1][ 0] #The action that the agent is supposed to perform in a given state pruned_list1 = prune_actionlist(stated, books) random_action = random.choice(pruned_list1) if (random.random() < epsilon): action_to_perform = random_action else: action_to_perform = state_action # action_to_perform is the full action with the objects parsed_action_1 = action_to_perform.split(" ") parsed_action = parsed_action_1[1:] param_cont = {} for l in parsed_action: if l[:4] == "book": param_cont["book_name"] = l else: param_cont["bin_name"] = l success, next_state1 = helper.execute_action( parsed_action_1[0], param_cont) # next_state1 is a dictionary next_state = json.dumps(next_state1) # next_state is a string state_list = state_dict.keys() label = 0 for statename in state_list: if statename == next_state: # trying to check if the next_state already exists as an entry in statename label = 1 break if label == 0: # if the next_state doesnot exist then the next state is iniatialized with zero qvalues for each of the actions. action_qpair = {} pruned_list2 = prune_actionlist( next_state1, books ) # pruning the action list with the function prune_actionlist() for action in pruned_list2: action_qpair[action] = 0 state_dict[next_state] = action_qpair reward = helper.get_reward(stated, parsed_action_1[0], next_state1) reward_cum = reward_prev + ((self.gamma**step) * reward ) # Calculating cumulative reward exploring_state_dict = state_dict[state] q_old = exploring_state_dict[action_to_perform] q = ((1 - self.alpha) * q_old) + self.alpha * ( reward + self.gamma * max(state_dict[next_state].values()) ) # Calculating the q_value for being in a state and taking a specific action exploring_state_dict[ action_to_perform] = q # Plugging the q-value into the action q value dictionary state_dict[state] = exploring_state_dict state = next_state # state , next_state is a string stated = next_state1 # stated is a dictionary reward_prev = reward_cum step = step + 1 print str(i) + "," + str(reward_cum) reward_list = reward_list + [reward_cum] iteration_list = iteration_list + [i] helper.reset_world() q_values = {} q_values = state_dict #plt.plot(iteration_list) #plt.ylabel(reward_list) #plt.show() return q_values
def astar(use_custom_heuristic): ''' Perform A* search to find sequence of actions from initial state to goal state ''' helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] # to get the possible action->(state,cost) mapping from current state state_dictionary = helper.get_successor(init_state) if use_custom_heuristic == True: def heuristic(state): a=goal_state.x - state.x + goal_state.y - state.y if state.orientation in ['WEST','SOUTH']:a=a+1 else: a=a-1 return a L_heap = [] heapq.heappush(L_heap, (heuristic(init_state),0, ["mv"], init_state,0)) visited = [] result = False counter=0 while L_heap: current = heapq.heappop(L_heap) if current[3].x==goal_state.x and current[3].y==goal_state.y: action_list = current[2] action_list.pop(0) result = True break elif current[3].x < 0 or current[3].y < 0: #print('state out of bound') continue elif current[3] in visited: #print('Already Visited') continue else: #print('New State') next_states = helper.get_successor(current[3]) visited.append(current[3]) for key, value in next_states.items(): path_list = current[2] path_list = path_list + [key] counter=counter+1 total_cost=current[4]+value[1] astar_val=heuristic(current[3])+total_cost heapq.heappush(L_heap, (astar_val,counter, path_list, value[0],total_cost)) if not result: action_list = [] else: def heuristic(state): a=goal_state.x-state.x+goal_state.y-state.y return a L_heap = [] heapq.heappush(L_heap, (heuristic(init_state),0, ["mv"], init_state,0)) visited = [] result = False counter = 0 while L_heap: current = heapq.heappop(L_heap) if current[3].x==goal_state.x and current[3].y==goal_state.y: action_list = current[2] action_list.pop(0) result = True break elif current[3].x < 0 or current[3].y < 0: #print('state out of bound') continue elif current[3] in visited: # print('Already Visited') continue else: #print('New State') next_states = helper.get_successor(current[3]) visited.append(current[3]) for key, value in next_states.items(): counter=counter+1 path_list = current[2] path_list = path_list + [key] total_cost=current[4]+value[1] astar_val=heuristic(current[3])+total_cost heapq.heappush(L_heap, (astar_val,counter, path_list, value[0],total_cost)) if not result: action_list = [] ''' YOUR CODE HERE ''' return action_list
class Refine: """ This class has code to refine the high level actions used by PDDL to the the low level actions used by the TurtleBot. """ def __init__(self, plan_file, planner): """ :param plan_file: Path to the file where plan is stored. This is generally stored at the path from where the planner was called. :type plan_file: str :param planner: Planner that was used to generate this plan. This is needed to parse the plan and convert to/store in a data structure in Python. Only possible values are FF and FD. :type planner: str Attributes: **status_subscriber**: To subscribe to the "/status" topic, which gives the State of the robot. **actions_queue** (tuple(action_name, action_params)): Used to store the refined action tuples. You can decide what parameters you want to store for each action. Store the parameters that you need to pass to execute_<action_name>_action methods of RobotActionServer class. **action_index** (int): Stores the index of action that needs to be sent to RobotActionServer for execution. """ rospy.init_node('listener', anonymous=True) self.status_subscriber = rospy.Subscriber('/status', String, self.status_callback) self.helper = problem.Helper() self.action_index = 0 print plan_file self.actions_queue = self.refine_plan(plan_file, planner) self.execute_action() rospy.spin() def status_callback(self, data): """ Callback function for subscriber to the `/status` rostopic """ if(data.data == "Idle"): self.execute_action() def execute_action(self): """ This method picks an action from self.actions and send it to RobotActionServer for execution. :rtype: None """ helper = problem.Helper() if (self.action_index >= len(self.actions_queue)): return action = self.actions_queue[self.action_index] self.action_index += 1 print("Executing ", action[0]) if action[0] == 'move': helper.execute_move_action(action[1]) elif action[0] == 'pick': helper.execute_pick_action(action[1][0], action[1][1]) elif action[0] == 'collect': helper.execute_collect_action(action[1][0], action[1][1]) elif action[0] == 'serve': helper.execute_serve_action(action[1][0], action[1][1], action[1][2]) elif action[0] == 'throw': helper.execute_throw_action(action[1][0], action[1][1], action[1][2]) return # Your code here def refine_plan(self, plan_file, planner): """ Perform downward refinement to convert the high level plan into a low level executable plan. :param plan_file: Path to the file where plan is stored. This is generally stored at the path from where the planner was called. :type plan_file: str :param planner: Planner that was used to generate this plan. This is needed to parse the plan and convert to/store in a data structure in Python. Only possible values are FF and FD. :type planner: str :returns: List of refined action tuples that will be added to the execution queue. :rtype: list(tuples) .. note:: .. hlist:: :columns: 1 * Parse the plan from plan_file. This has to be according to the planner you are using. * use get_path(current_state, load_locations) to refine Move action. * Subject of the book and bin does not match. * Robot Location is not within the load location of the bin, i.e. robot is not in the viscinity of the bin. """ def convert_time (time_name1): if time_name == one : time=1 elif time_name == two : time=2 elif time_name == three : time=3 else time_name == four : time =4 return time objects_dict = self.parse_object_data() helper = problem.Helper() current_state = helper.get_initial_state() action_list = [] with open(plan_file) as f: plan_sequence = f.readlines()[:-1] for action_info in plan_sequence: action_info_list = action_info[1:-2].split() action = action_info_list[0] if action == 'pick': bowl_name = action_info_list[1] time_name = action_info_list[4] time = convert_time (time_name) action_params = (bowl_name, current_state) action_list.append((action, action_params)) elif action == 'collect': bowl_name = action_info_list[1] action_params = (bowl_name, current_state) action_list.append((action, action_params)) elif action == 'serve': bowl_name = action_info_list[1] time_name = action_infor_list[5] table_name = action_info_list[-2] action_params = (bowl_name, table_name, current_state) action_list.append((action, action_params)) elif action == 'throw': bowl_name = action_info_list[1] dustbin = action_info_list[-2] action_params = (bowl_name, dustbin, current_state) action_list.append((action, action_params)) elif action == 'move': if "bowl" in action_info_list[-1][:-5]: destination_locations = objects_dict.get('kitchen_table').get('load_loc') else: destination_locations = objects_dict.get(action_info_list[-1][:-5]).get('load_loc') print destination_locations moves_list, destination_state, goal_reached = self.get_path(current_state, destination_locations) if not goal_reached: return [] action_list.append((action, moves_list)) current_state = destination_state print(action_list) return action_list
def ucs(use_custom_heuristic, use_debug_mode): ''' Perform UCS to find sequence of actions from initial state to goal state ''' global debug_mode global debug_sequence if use_debug_mode: return debug_sequence helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] counter = 0 #For breaking ties # to get the possible action->(state,cost) mapping from current state if helper.is_goal_state(init_state): print "Found Goal" return action_list ''' Use priority queue to keep track of path and Path Cost Each item in frontier is of the form[Cumulative Cost,counter,action1,action2,action3......actionN,(LastNode)] ''' root = init_state frontier = [] explored = set() item = [0.0, counter, [], root] heapq.heappush(frontier, item) counter += 1 while True: if len(frontier) == 0: print "No path possible!" return action_list item = heapq.heappop(frontier) node = item.pop() node_actions = item.pop() node_cum_cost = item.pop(0) if helper.is_goal_state(node): return node_actions explored.add(str(node)) sucs = helper.get_successor(node) for action in sucs: counter += 1 actions = node_actions[:] child_node, child_cost = sucs[action] child_cost += node_cum_cost if (str(child_node) not in explored): # and (child_node not in frontier): index = findnode(frontier, child_node) if index != -1: if frontier[index][0] > child_cost: del frontier[index] actions.append(action) item = [child_cost, counter, actions, child_node] heapq.heappush(frontier, item) return action_list
def astar(use_custom_heuristic, use_debug_mode): ''' Perform A* search to find sequence of actions from initial state to goal state ''' global debug_mode global debug_sequence if use_debug_mode: return debug_sequence helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] counter = 0 if helper.is_goal_state(init_state): print "Found Goal" return action_list ''' Use priority queue to keep track of path and Path Cost ''' root = init_state frontier = [] explored = set() if use_custom_heuristic: heuristic = custom_heuristic2(root, goal_state) else: heuristic = manhattan(root, goal_state) node_cum_cost = 0.0 item = [heuristic, counter, node_cum_cost, [], root] heapq.heappush(frontier, item) counter += 1 while True: if len(frontier) == 0: print "No path possible!" return action_list item = heapq.heappop(frontier) node = item.pop() node_actions = item.pop() node_cum_cost = item.pop() if helper.is_goal_state(node): return node_actions explored.add(str(node)) sucs = helper.get_successor(node) for action in sucs: counter += 1 actions = node_actions[:] child_node, child_cost = sucs[action] child_cost += node_cum_cost if use_custom_heuristic: child_heuristic = custom_heuristic2(child_node, goal_state) else: child_heuristic = manhattan(child_node, goal_state) if (str(child_node) not in explored): actions.append(action) total_cost = child_heuristic + child_cost item = [total_cost, counter, child_cost, actions, child_node] heapq.heappush(frontier, item) return action_list
def astar(use_custom_heuristic): ''' Perform A* search to find sequence of actions from initial state to goal state ''' helper = problem.Helper() init_state = helper.get_initial_state() goal_state = helper.get_goal_state() possible_actions = helper.get_actions() action_list = [] # to get the possible action->(state,cost) mapping from current state state_dictionary = helper.get_successor(init_state) ''' YOUR CODE HERE ''' if use_custom_heuristic: if init_state.x != goal_state.x and init_state.y != goal_state.y: visited = [] init_dir = [] init_cost = 0 queue = list() # heuristic = 0.5 # start_to_goal_man_dis = abs(init_state.x - goal_state.x) + abs(init_state.y - goal_state.y) visited.append( [init_state.x, init_state.y, init_state.orientation]) array = astar_custom_heuristic(state_dictionary, goal_state, init_dir, init_cost, init_state, visited) queue = queue + array while (queue): queue.sort(key=lambda ele: ele[3]) node = queue.pop(0) if node[1].x < 0 or node[1].y < 0: continue else: if [node[1].x, node[1].y, node[1].orientation] not in visited: if node[1].x == goal_state.x and node[ 1].y == goal_state.y: action_list = action_list + node[0] break else: visited.append( [node[1].x, node[1].y, node[1].orientation]) successor = helper.get_successor(node[1]) out_arr = astar_custom_heuristic( successor, goal_state, node[0], node[2], init_state, visited) queue = queue + out_arr return action_list else: if init_state.x != goal_state.x and init_state.y != goal_state.y: visited = [] init_dir = [] init_cost = 0 queue = list() visited.append( [init_state.x, init_state.y, init_state.orientation]) array = astar_manhattan_heuristic(state_dictionary, goal_state, init_dir, init_cost, visited) queue = queue + array while (queue): queue.sort(key=lambda ele: ele[3]) node = queue.pop(0) if node[1].x < 0 or node[1].y < 0: continue else: if [node[1].x, node[1].y, node[1].orientation] not in visited: if node[1].x == goal_state.x and node[ 1].y == goal_state.y: action_list = action_list + node[0] break else: visited.append( [node[1].x, node[1].y, node[1].orientation]) successor = helper.get_successor(node[1]) out_arr = astar_manhattan_heuristic( successor, goal_state, node[0], node[2], visited) queue = queue + out_arr return action_list
def search(algorithm, time_limit): """ Performs a search using the specified algorithm. Parameters =========== algorithm: str The algorithm to be used for searching. time_limit: int The time limit in seconds to run this method for. Returns ======== tuple(list, int) A tuple of the action_list and the total number of nodes expanded. """ # The helper allows us to access the problem functions. helper = problem.Helper() # Get the initial and the goal states. init_state = helper.get_initial_state() goal_state = helper.get_goal_state() # Initialize the g_score map, we use this map to keep a track of the # shortest path to a state that we have observed. # # This map stores data of the form state -> int. g_score = {} g_score[init_state] = 0 # Initialize a map to store the best path to a state. # # This map stores data of the form state -> Node. best_path = {} best_path[init_state] = None # Initialize the init node of the search tree and compute its f_score. init_node = Node(init_state, None, 0, None, 0) f_score = compute_g(algorithm, init_node, goal_state) \ + compute_h(algorithm, init_node, goal_state) # Initialize the fringe as a priority queue. priority_queue = PriorityQueue() priority_queue.push(f_score, init_node) action_list = [] total_nodes_expanded = 0 time_limit = time.time() + time_limit while not priority_queue.is_empty() and time.time() < time_limit: # Pop the node with the smallest f_score from the fringe. current_node = priority_queue.pop() current_state = current_node.get_state() # If the current state is a goal state, we are done and need to # reconstruct the best path to the goal. if helper.is_goal_state(current_state): action_list = build_solution(best_path, current_node) break # Expand the node. total_nodes_expanded += 1 action_state_dict = helper.get_successor(current_state) for action, state in action_state_dict.items(): # The successor function actually stores items in the # action -> (next_state, cost) format. next_state, action_cost = state # Only process valid states. if is_invalid(next_state): continue next_node = Node(next_state, current_node, current_node.get_depth() + 1, action, action_cost) g_value = compute_g(algorithm, next_node, goal_state) # If a cheaper path is found to the next_state, then we can # add the next_state to the fringe. if g_value < g_score.get(next_state, float("inf")): # Remember the cheaper path to the next_state from the # current_state/current_node. # # We need the node since we want to remember the action as well. best_path[next_state] = current_node # Remember the cheaper path cost. g_score[next_state] = g_value # Compute the f-value and add to the fringe (unless it is # already there). f_score = g_value + compute_h(algorithm, next_node, goal_state) if not priority_queue.contains(next_state): priority_queue.push(f_score, next_node) if time.time() >= time_limit: raise SearchTimeOutError("Search timed out after %u secs." % (time_limit)) return action_list, total_nodes_expanded