Python Helperの例、problem.Helper Pythonの例

コード例 #1

0

ファイルを表示

def helper_bfs(init_state, goal_state):

    helper = problem.Helper()
    state_dictionary = helper.get_successor(init_state)
    visited = []
    visited.append([init_state.x, init_state.y, init_state.orientation])
    queue = list()
    init_cost = 1

    for key in state_dictionary:
        l = list(key.split(","))
        queue.append([l, state_dictionary[key][0], init_cost])
    cost = float('inf')
    while (queue):

        node = queue.pop(0)
        if node[1].x < 0 or node[1].y < 0:
            continue
        else:
            if [node[1].x, node[1].y, node[1].orientation] not in visited:
                if node[1].x == goal_state.x and node[1].y == goal_state.y:
                    cost = node[2]
                    break
                else:
                    visited.append([node[1].x, node[1].y, node[1].orientation])
                    successor = helper.get_successor(node[1])

                    for key in successor:
                        l1 = list(key.split(","))
                        queue.append(
                            [node[0] + l1, successor[key][0], node[2] + 1])
    # mem.update({list(init_state) : cost})
    #print(type(init_state))
    d[init_state_tuple] = cost
    return cost

コード例 #2

0

ファイルを表示

ファイル: search_algorithm.py プロジェクト: SwastikNandan/CSE-571

def gbfs(use_custom_heuristic):
    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    state_dictionary = helper.get_successor(init_state)
    path_cost = 0
    action_list1 = []
    node = (init_state, action_list1)
    frontier = []
    priority = 0
    heapq.heappush(frontier, (priority, node))
    explored = []
    action_list100 = []

    while (len(frontier) != 0):

        popped_node = heapq.heappop(frontier)
        current_state, action_list1 = popped_node[1]
        action_list = []
        state_dictionary = helper.get_successor(current_state)
        if (helper.is_goal_state(current_state)):
            # print "goal_state found = ", current_state
            print "goal found"
            action_list100 = action_list1
            break
        explored.append(current_state)
        for action in possible_actions:
            newaction = [action]
            action_list = action_list1 + newaction
            next_state = state_dictionary[action][0]
            if use_custom_heuristic == True:
                priority = (goal_state.x - next_state.x)**2 + (goal_state.y -
                                                               next_state.y)**2
            else:
                priority = abs(goal_state.x -
                               next_state.x) + abs(goal_state.x - next_state.y)
            if (state_dictionary[action][1] == -1):
                continue
            child = (next_state, action_list)
            k = len(explored)
            l = len(frontier)
            flag1 = 0
            flag2 = 0
            for i in range(l):
                if (frontier[i][1][0] == next_state):
                    flag1 = 1
                    break

            for j in range(k):
                if (explored[j] == next_state):
                    flag2 = 1
                    break

            if (flag1 == 1 or flag2 == 1):
                continue
            if (flag1 == 0 and flag2 == 0):
                heapq.heappush(frontier, (priority, child))

    return action_list100

コード例 #3

0

ファイルを表示

    def __init__(self, task, headless=1, sample=1, episodes=1):
        rospy.init_node('qlearning', anonymous=True)
        root_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), os.path.pardir))

        self.books_json_file = root_path + "/books.json"
        self.books = json.load(open(self.books_json_file))
        self.helper = problem.Helper()
        self.helper.reset_world()

        self.headless = headless
        self.alpha = 0.3
        self.gamma = 0.9
        self.root_path = root_path

        if (task == 1):
            trajectories_json_file = root_path + "/trajectories{}.json".format(
                sample)
            q_values = self.task1(trajectories_json_file)
        elif (task == 2):
            q_values = self.task2(episodes)
        elif (task == 3):
            q_values = self.task3(episodes)
        elif (task == 4):
            q_values = self.task4(episodes)
        elif (task == 5):
            q_values = self.task5(episodes)

        with open(root_path + "/q_values.json", "w") as fout:
            json.dump(q_values, fout)

コード例 #4

0

ファイルを表示

def ucs(use_custom_heuristic):
    '''
    Perform UCS to find sequence of actions from initial state to goal state
    '''
    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions() 
    action_list = []

    # to get the possible action->(state,cost) mapping from current state
    state_dictionary = helper.get_successor(init_state)

    L_heap = []
    heapq.heappush(L_heap, (0,0, ["mv"],init_state))

    #L.put([["Mv"],init_state])
    visited=[]

    result=False
    counter=0
    while L_heap:
        #current=L.get()
        current=heapq.heappop(L_heap)
        if current[3].x==goal_state.x and current[3].y==goal_state.y:


            action_list=current[2]
            action_list.pop(0)
            result=True
            break
        elif current[3].x<0 or current[3].y<0:
            #print('state out of bound')
            continue
        elif current[3] in visited:
            #print('Already Visited')
            continue
        else:
            #print('New State')
            next_states=helper.get_successor(current[3])
            visited.append(current[3])
            for key,value in next_states.items():
                path_list=current[2]
                path_list=path_list+[key]
                total_cost=current[0]+value[1]
                counter=counter+1

                heapq.heappush(L_heap, (total_cost,counter, path_list, value[0]))

    if not result:

        action_list=[]

    '''
    YOUR CODE HERE
    '''

    return action_list

コード例 #5

0

ファイルを表示

def states_from_action_list(action_list):
    helper = problem.Helper()
    state = helper.get_initial_state()
    states = []
    for action in action_list:
        states.append(state)
        sucs = helper.get_successor(state)
        state, child_cost = sucs[action]
    pp.pprint(states)

コード例 #6

0

ファイルを表示

ファイル: search_algorithm.py プロジェクト: SwastikNandan/CSE-571

def bfs(use_custom_heuristic):

    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    path_cost = 0
    action_list1 = []
    node = (init_state, action_list1)
    node_action = (node, action_list1)
    frontier = []
    frontier.append((node))
    explored = []
    action_list100 = []

    while (len(frontier) != 0):
        current_state, action_list1 = frontier.pop(0)
        # print "current_state: ", current_state
        action_list = []
        state_dictionary = helper.get_successor(current_state)
        key = state_dictionary.keys()
        if (helper.is_goal_state(current_state)):
            # print "goal_state found = ", current_state
            print "goal found"
            action_list100 = action_list1
            break
        explored.append(current_state)
        for action in key:
            newaction = [action]
            action_list = action_list1 + newaction
            next_state = state_dictionary[action][0]

            if (state_dictionary[action][1] == -1):
                continue

            child = (next_state, action_list)
            k = len(explored)
            l = len(frontier)
            flag1 = 0
            flag2 = 0
            for i in range(l):
                if (frontier[i][0] == next_state):
                    flag1 = 1
                    break

            for j in range(k):
                if (explored[j] == next_state):
                    flag2 = 1
                    break

            if (flag1 == 1 or flag2 == 1):
                continue
            if (flag1 == 0 and flag2 == 0):
                frontier.append(child)

    return action_list100

コード例 #7

0

ファイルを表示

def bfs(use_custom_heuristic, use_debug_mode):

    global debug_sequence
    if use_debug_mode:
        return debug_sequence

    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    action_list = []
    counter = 0  #For breaking ties

    state_dictionary = helper.get_successor(init_state)
    if helper.is_goal_state(init_state):
        print "Found Goal"
        return action_list

    root = init_state

    frontier = deque()
    actions_frontier = deque()

    frontier.append(root)
    actions_frontier.append([])

    explored = set()
    counter += 1
    while True:
        if len(frontier) == 0:
            print "No path possible!"
            return action_list

        node = frontier.popleft()
        node_actions = actions_frontier.popleft()

        explored.add(str(node))
        sucs = helper.get_successor(node)

        for action in sucs:
            counter += 1
            actions = node_actions[:]
            child_node, child_cost = sucs[action]
            if ((str(child_node) not in explored) and
                (child_node
                 not in frontier)):  # and (child_node not in frontier):
                actions.append(action)
                if helper.is_goal_state(child_node):
                    print "Found goal! : " + str(child_node)
                    return actions

                frontier.append(child_node)
                actions_frontier.append(actions)

    return action_list

コード例 #8

0

ファイルを表示

def bfs(use_custom_heuristic):
    '''
    Perform BFS to find sequence of actions from initial state to goal state
    '''
    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions() 
    action_list = []

    # to get the possible action->(state,cost) mapping from current state
    state_dictionary = helper.get_successor(init_state)
    L=queue.Queue(maxsize=0)
    L.put([["Mv"],init_state])
    visited=[]

    result=False

    while not L.empty():
        current=L.get()
        if current[1].x==goal_state.x and current[1].y==goal_state.y:
            action_list=current[0]
            action_list.pop(0)
            result=True
            break
        elif current[1].x<0 or current[1].y<0:
            #print('state out of bound')
            continue

        elif current[1] in visited:
            continue

        else:
            #print('New State')
            next_states=helper.get_successor(current[1])
            visited.append(current[1])
            for key,value in next_states.items():
                path_list=current[0]
                path_list=path_list+[key]
                L.put([path_list,value[0]])


    if not result:
        action_list=[]



    '''
    YOUR CODE HERE
    '''

    return action_list

コード例 #9

0

ファイルを表示

ファイル: search_algorithm.py プロジェクト: PARTHAVPATEL28/Artifitial-Intelligence-Assignments

def ucs(use_custom_heuristic):
    '''
    Perform UCS to find sequence of actions from initial state to goal state
    '''
    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    action_list = []

    # to get the possible action->(state,cost) mapping from current state
    state_dictionary = helper.get_successor(init_state)
    visited = []
    q3 = []
    entry_count = 0
    if not helper.is_goal_state(init_state):
        visited.append(init_state)
        for key, values in state_dictionary.items():
            list_1 = []
            temp = []
            list_1.extend(key.split())
            temp.append(list_1)
            temp.append(values[0])
            heapq.heappush(q3, (values[1], entry_count, temp))
            entry_count += 1
        while q3:
            cur_node = heapq.heappop(q3)
            if cur_node[2][1].x < 0 or cur_node[2][1].y < 0:
                continue
            elif helper.is_goal_state(cur_node[2][1]):
                action_list = cur_node[2][0]
                break
            elif cur_node[2][1] in visited:
                continue
            else:
                temp = helper.get_successor(cur_node[2][1])
                visited.append(cur_node[2][1])
                for key, values in temp.items():
                    cost = 0
                    list_1 = []
                    temp = []
                    cost = values[1] + cur_node[0]
                    list_1.extend(cur_node[2][0])
                    list_1.extend(key.split())
                    temp.append(list_1)
                    temp.append(values[0])
                    heapq.heappush(q3, (cost, entry_count, temp))
                    entry_count += 1

    return action_list

コード例 #10

0

ファイルを表示

def ucs(use_custom_heuristic):
    '''
    Perform UCS to find sequence of actions from initial state to goal state
    '''
    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    action_list = []

    # to get the possible action->(state,cost) mapping from current state
    state_dictionary = helper.get_successor(init_state)
    '''
    YOUR CODE HERE
    '''
    if init_state.x != goal_state.x and init_state.y != goal_state.y:

        visited = []
        queue = list()
        visited.append([init_state.x, init_state.y, init_state.orientation])

        for key in state_dictionary:
            l = list(key.split(","))
            queue.append(
                [l, state_dictionary[key][0], state_dictionary[key][1]])

        while (queue):

            queue.sort(key=lambda ele: ele[2])
            node = queue.pop(0)
            if node[1].x < 0 or node[1].y < 0:
                continue
            else:
                if [node[1].x, node[1].y, node[1].orientation] not in visited:
                    if node[1].x == goal_state.x and node[1].y == goal_state.y:
                        action_list = action_list + node[0]
                        break
                    else:
                        visited.append(
                            [node[1].x, node[1].y, node[1].orientation])
                        successor = helper.get_successor(node[1])
                        # print("Move: ", node[0])
                        # print("Successor: ",successor)
                        for key in successor:
                            l1 = list(key.split(","))
                            val = successor[key][1]
                            queue.append([
                                node[0] + l1, successor[key][0], node[2] + val
                            ])
    return action_list

コード例 #11

0

ファイルを表示

ファイル: search_algorithm.py プロジェクト: PARTHAVPATEL28/Artifitial-Intelligence-Assignments

def bfs(use_custom_heuristic):
    '''
    Perform BFS to find sequence of actions from initial state to goal state
    '''
    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    action_list = []

    # to get the possible action->(state,cost) mapping from current state
    state_dictionary = helper.get_successor(init_state)
    import Queue as queue
    visited = []
    q3 = queue.Queue()
    if not helper.is_goal_state(init_state):
        visited.append(init_state)
        for key, values in state_dictionary.items():
            list_1 = []
            temp = []
            list_1.extend(key.split())
            temp.append(list_1)
            temp.append(values[0])
            q3.put(temp)
        while not q3.empty():
            cur_node = q3.get()
            if cur_node[1].x < 0 or cur_node[1].y < 0:
                continue
            elif helper.is_goal_state(cur_node[1]):
                action_list = cur_node[0]
                break
            elif cur_node[1] in visited:
                continue
            else:
                temp = helper.get_successor(cur_node[1])
                visited.append(cur_node[1])
                for key, values in temp.items():
                    list_1 = []
                    temp = []
                    list_1.extend(cur_node[0])
                    list_1.extend(key.split())
                    temp.append(list_1)
                    temp.append(values[0])
                    q3.put(temp)
    return action_list

コード例 #12

0

ファイルを表示

ファイル: qlearning.py プロジェクト: PARTHAVPATEL28/Artifitial-Intelligence-Assignments

    def task1(self, trajectories_json_file):

        q_values = {}
        h = problem.Helper()
        actions = h.get_all_actions()
        reward = 1
        with open(trajectories_json_file, 'r') as file:
            data = json.load(file)
        for index in data:
            q_values[index['state']] = {}
            for action in actions:
                q_values[index['state']][action] = 0
        while reward <= 1:
            for i in range(len(data) - 1):
                q_values[data[i]['state']][
                    data[i]['action']] = q_values[data[i]['state']][
                        data[i]['action']] * (1 - self.alpha) + self.alpha * (
                            data[i]['reward'] + self.gamma *
                            max(q_values[data[i + 1]['state']].values()))
            reward += 1
        return q_values

コード例 #13

0

ファイルを表示

    def __init__(self, plan_file, planner):
        """
        :param plan_file: Path to the file where plan is stored. This is generally stored at the path from where the planner was called.
        :type plan_file: str
        :param planner: Planner that was used to generate this plan. This is needed to parse the plan and convert to/store in a data structure in Python. Only possible values are FF and FD.
        :type planner: str 

        Attributes:
            **status_subscriber**: To subscribe to the "/status" topic, which gives the State of the robot.

            **actions_queue** (tuple(action_name, action_params)): Used to store the refined action tuples. You can decide what parameters you want to store for each action. Store the parameters that you need to pass to execute_<action_name>_action methods of RobotActionServer class.

            **action_index** (int): Stores the index of action that needs to be sent to RobotActionServer for execution.
        """
        rospy.init_node('listener', anonymous=True)
        self.status_subscriber = rospy.Subscriber('/status', String,
                                                  self.status_callback)
        self.helper = problem.Helper()
        self.action_index = 0
        self.actions_queue = self.refine_plan(plan_file, planner)
        self.execute_action()
        rospy.spin()

コード例 #14

0

ファイルを表示

ファイル: qlearning.py プロジェクト: SwastikNandan/reinforcement

    def task1(self, trajectories_json_file):
        helper = problem.Helper()
        q_values = {}
        all_actions = helper.get_all_actions()
        trajectory1 = open(
            trajectories_json_file)  #location of the trajectory1 file
        List1 = json.load(trajectory1)
        lastelem = copy(List1[-1])
        lastelem["reward"] = 0
        for entry in List1:
            action = entry["action"]
            state = entry["state"]
            reward = entry["reward"]
            temp = {}
            for action1 in all_actions:
                temp[action1] = 0
            q_values[state] = temp

        for (entry, next_entry) in zip(List1, List1[1:] + [lastelem]):
            action = entry["action"]
            state = entry["state"]
            reward = entry["reward"]
            next_state = next_entry["state"]
            temp = {}
            q_valuenext = {}
            q_valuenext = q_values[next_state]
            newstate = {}
            newstate = q_values[state]
            for action1 in all_actions:
                if action1 == action:
                    q = self.alpha * (reward + self.gamma *
                                      (max(q_valuenext.values())))
                    newstate[action1] = q
            q_values[state] = newstate
        print(json.dumps(q_values, sort_keys=True, indent=6))
        return q_values

コード例 #15

0

ファイルを表示

ファイル: refinement.py プロジェクト: SwastikNandan/reinforcement

    def execute_action(self):
        """
        This method picks an action from self.actions and send it to RobotActionServer for execution.

        :rtype: None
        
        """
        helper = problem.Helper()
        if (self.action_index >= len(self.actions_queue)):
            return
        action = self.actions_queue[self.action_index]
        self.action_index += 1
        print("Executing ", action[0])
        if action[0] == 'move':
            helper.execute_move_action(action[1])
        elif action[0] == 'pick':
            helper.execute_pick_action(action[1][0], action[1][1])
        elif action[0] == 'collect':
            helper.execute_collect_action(action[1][0], action[1][1])
        elif action[0] == 'serve':
            helper.execute_serve_action(action[1][0], action[1][1], action[1][2])
        elif action[0] == 'throw':
            helper.execute_throw_action(action[1][0], action[1][1], action[1][2])
        return

コード例 #16

0

ファイルを表示

__docformat__ = 'reStructuredText'

import rospy
from std_msgs.msg import String
import problem
import json
import os
import argparse
import time
import heapq

import numpy as np
from robot import Robot
from problem import State

helper = problem.Helper()

object_dict = {}
bin_subject_name_dict = {}
current_occupied_locs = set()
age_dict = {}
unplaced_books = set()
priority_queue = []
current_state = ()
robot = Robot()


def getLatestObjectDictionary():
    global object_dict
    object_dict = helper.get_unplaced_books()

コード例 #17

0

ファイルを表示

ファイル: qlearning.py プロジェクト: SwastikNandan/reinforcement

    def task2(self, episodes):
        helper = problem.Helper()
        init_state1 = helper.get_current_state()
        init_state = json.dumps(init_state1)
        all_actions = helper.get_all_actions()
        action_qpair = {}
        for action in all_actions:
            action_qpair[action] = 0
        state_dict = {}
        state_dict[init_state] = action_qpair
        state = init_state  # state is a string
        stated = init_state1  # stated is a dictionary
        reward_list = []
        iteration_list = []

        for i in range(episodes):
            state = init_state
            stated = init_state1
            reward_prev = 0
            count = 0
            step = 1
            while (count == 0):
                if (helper.is_terminal_state(stated)):
                    count = 1
                epsilon = max(0.05, 0.7 - (0.05 * (i + 1)))
                x = state_dict[state].items()
                state_action = sorted(x, key=lambda t: t[1])[-1][
                    0]  #The optimal action that the agent is supposed to perform in a given state
                random_action = random.choice(all_actions)
                if (random.random() < epsilon):
                    action_to_perform = random_action
                else:
                    action_to_perform = state_action  # action_to_perform is the full action with the objects
                parsed_action_1 = action_to_perform.split(" ")
                parsed_action = parsed_action_1[1:]
                param_cont = {}
                for l in parsed_action:
                    if l[:4] == "book":
                        param_cont["book_name"] = l
                    else:
                        param_cont["bin_name"] = l
                success, next_state1 = helper.execute_action(
                    parsed_action_1[0],
                    param_cont)  # next_state1 is a dictionary
                next_state = json.dumps(next_state1)  # next_state is a string
                state_list = state_dict.keys()

                label = 0
                for statename in state_list:
                    if statename == next_state:  # trying to check if the next_state already exists as an entry in statename
                        label = 1
                        break
                if label == 0:  # if the next_state doesnot exist then the next state is iniatialized with zero qvalues for each of the actions.
                    action_qpair = {}
                    for action in all_actions:
                        action_qpair[action] = 0
                        state_dict[next_state] = action_qpair
                reward = helper.get_reward(stated, parsed_action_1[0],
                                           next_state1)
                reward_cum = reward_prev + ((self.gamma**step) * reward
                                            )  # Calculating cumulative reward
                exploring_state_dict = state_dict[state]
                q_old = exploring_state_dict[action_to_perform]
                q = ((1 - self.alpha) * q_old) + (
                    self.alpha *
                    (reward +
                     self.gamma * max(state_dict[next_state].values()))
                )  # Calculating the q_value for being in a state and taking a specific action
                exploring_state_dict[
                    action_to_perform] = q  # Plugging the q-value into the action q value dictionary
                state_dict[state] = exploring_state_dict
                state = next_state  # state , next_state is a string
                stated = next_state1  # stated is a dictionary
                reward_prev = reward_cum
                step = step + 1
            print str(i) + "," + str(reward_cum)
            reward_list = reward_list + [reward_cum]
            iteration_list = iteration_list + [i]
            helper.reset_world()

        q_values = {}
        q_values = state_dict
        #plt.plot(iteration_list)
        #plt.ylabel(reward_list)
        #plt.show()
        return q_values

コード例 #18

0

ファイルを表示

 def __init__(self):
     self.helper = problem.Helper()

コード例 #19

0

ファイルを表示

ファイル: qlearning.py プロジェクト: SwastikNandan/reinforcement

    def task3(self, episodes, books):
        def prune_actionlist(state1, book):
            all_action2 = helper.get_all_actions()
            location_robot = [state1["robot"]["x"], state1["robot"]["y"]]
            # Please put the path of books.json
            objdict = book
            book_name = objdict["books"]
            book_1d = book_name["book_1"]
            book_1_load_location = book_1d["load_loc"]
            book_1_load_location_1 = book_1_load_location[0]
            book_1_load_location_2 = book_1_load_location[1]
            book_2d = book_name["book_2"]
            book_2_load_location = book_2d["load_loc"]
            book_2_load_location_1 = book_2_load_location[0]
            book_2_load_location_2 = book_2_load_location[1]
            bin_name = objdict["bins"]
            bin_1d = bin_name["trolly_1"]
            bin_1_load_location = bin_1d["load_loc"]
            bin_2d = bin_name["trolly_2"]
            bin_2_load_location = bin_2d["load_loc"]
            if ((location_robot != book_1_load_location_1)
                    and (location_robot != book_1_load_location_2)
                    and (location_robot != book_2_load_location_1)
                    and (location_robot != book_2_load_location_2)):
                for elem in all_action2:
                    if (elem[:12] == "careful_pick"
                            or elem[:11] == "normal_pick"):
                        all_action2.remove(elem)
            if ((location_robot != bin_1_load_location[0])
                    and (location_robot != bin_1_load_location[1])
                    and (location_robot != bin_1_load_location[2])
                    and (location_robot != bin_1_load_location[3])
                    and (location_robot != bin_1_load_location[4])
                    and (location_robot != bin_1_load_location[5])
                    and (location_robot != bin_1_load_location[6])
                    and (location_robot != bin_1_load_location[7])
                    and (location_robot != bin_2_load_location[0])
                    and (location_robot != bin_1_load_location[1])
                    and (location_robot != bin_1_load_location[2])
                    and (location_robot != bin_1_load_location[3])
                    and (location_robot != bin_1_load_location[4])
                    and (location_robot != bin_1_load_location[5])
                    and (location_robot != bin_1_load_location[6])
                    and (location_robot != bin_1_load_location[7])):
                for elem in all_action2:
                    if (elem[:13] == "careful_place"
                            or elem[:12] == "normal_place"):
                        all_action2.remove(elem)

            pruned_action_list = all_action2
            return pruned_action_list

        helper = problem.Helper()
        init_state1 = helper.get_current_state()
        init_state = json.dumps(init_state1)
        all_actions = helper.get_all_actions()
        action_qpair = {}
        pruned_list = prune_actionlist(
            init_state1, books
        )  # pruning the action list with the function prune_actionlist()
        for action in pruned_list:
            action_qpair[action] = 0
        state_dict = {}
        state_dict[init_state] = action_qpair
        state = init_state  # state is a string
        stated = init_state1  # stated is a dictionary
        reward_list = []
        iteration_list = []

        for i in range(episodes):
            state = init_state
            stated = init_state1
            reward_prev = 0
            count = 0
            step = 1
            while (count == 0):
                if (helper.is_terminal_state(stated)):
                    count = 1
                epsilon = max(0.05, 0.7 - (0.05 * i))
                x = state_dict[state].items()
                state_action = sorted(x, key=lambda t: t[1])[-1][
                    0]  #The action that the agent is supposed to perform in a given state
                pruned_list1 = prune_actionlist(stated, books)
                random_action = random.choice(pruned_list1)
                if (random.random() < epsilon):
                    action_to_perform = random_action
                else:
                    action_to_perform = state_action  # action_to_perform is the full action with the objects
                parsed_action_1 = action_to_perform.split(" ")
                parsed_action = parsed_action_1[1:]
                param_cont = {}
                for l in parsed_action:
                    if l[:4] == "book":
                        param_cont["book_name"] = l
                    else:
                        param_cont["bin_name"] = l
                success, next_state1 = helper.execute_action(
                    parsed_action_1[0],
                    param_cont)  # next_state1 is a dictionary
                next_state = json.dumps(next_state1)  # next_state is a string
                state_list = state_dict.keys()

                label = 0
                for statename in state_list:
                    if statename == next_state:  # trying to check if the next_state already exists as an entry in statename
                        label = 1
                        break
                if label == 0:  # if the next_state doesnot exist then the next state is iniatialized with zero qvalues  for each of the actions.
                    action_qpair = {}
                    pruned_list2 = prune_actionlist(
                        next_state1, books
                    )  # pruning the action list with the function prune_actionlist()
                    for action in pruned_list2:
                        action_qpair[action] = 0
                        state_dict[next_state] = action_qpair
                reward = helper.get_reward(stated, parsed_action_1[0],
                                           next_state1)
                reward_cum = reward_prev + ((self.gamma**step) * reward
                                            )  # Calculating cumulative reward
                exploring_state_dict = state_dict[state]
                q_old = exploring_state_dict[action_to_perform]
                q = ((1 - self.alpha) * q_old) + self.alpha * (
                    reward + self.gamma * max(state_dict[next_state].values())
                )  # Calculating the q_value for being in a state and taking a specific action
                exploring_state_dict[
                    action_to_perform] = q  # Plugging the q-value into the action q value dictionary
                state_dict[state] = exploring_state_dict
                state = next_state  # state , next_state is a string
                stated = next_state1  # stated is a dictionary
                reward_prev = reward_cum
                step = step + 1
            print str(i) + "," + str(reward_cum)
            reward_list = reward_list + [reward_cum]
            iteration_list = iteration_list + [i]
            helper.reset_world()
        q_values = {}
        q_values = state_dict
        #plt.plot(iteration_list)
        #plt.ylabel(reward_list)
        #plt.show()
        return q_values

コード例 #20

0

ファイルを表示

def astar(use_custom_heuristic):
    '''
    Perform A* search to find sequence of actions from initial state to goal state
    '''
    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions() 
    action_list = []


    # to get the possible action->(state,cost) mapping from current state
    state_dictionary = helper.get_successor(init_state)
    if use_custom_heuristic == True:

        def heuristic(state):
            a=goal_state.x - state.x + goal_state.y - state.y
            if state.orientation in ['WEST','SOUTH']:a=a+1
            else: a=a-1
            return a

        L_heap = []
        heapq.heappush(L_heap, (heuristic(init_state),0, ["mv"], init_state,0))

        visited = []

        result = False
        counter=0
        while L_heap:
            current = heapq.heappop(L_heap)
            if current[3].x==goal_state.x and current[3].y==goal_state.y:


                action_list = current[2]
                action_list.pop(0)
                result = True
                break
            elif current[3].x < 0 or current[3].y < 0:
                #print('state out of bound')
                continue
            elif current[3] in visited:
                #print('Already Visited')
                continue

            else:
                #print('New State')
                next_states = helper.get_successor(current[3])
                visited.append(current[3])
                for key, value in next_states.items():
                    
                    path_list = current[2]
                    path_list = path_list + [key]
                    counter=counter+1
                    total_cost=current[4]+value[1]
                    astar_val=heuristic(current[3])+total_cost

                    heapq.heappush(L_heap, (astar_val,counter, path_list, value[0],total_cost))

        if not result:

            action_list = []

    else:

        def heuristic(state):
            a=goal_state.x-state.x+goal_state.y-state.y
            return a

        L_heap = []
        heapq.heappush(L_heap, (heuristic(init_state),0, ["mv"], init_state,0))

        visited = []

        result = False
        counter = 0
        while L_heap:
            current = heapq.heappop(L_heap)
            if current[3].x==goal_state.x and current[3].y==goal_state.y:


                action_list = current[2]
                action_list.pop(0)
                result = True
                break
            elif current[3].x < 0 or current[3].y < 0:
                #print('state out of bound')
                continue
            elif current[3] in visited:
                # print('Already Visited')
                continue

            else:
                #print('New State')
                next_states = helper.get_successor(current[3])
                visited.append(current[3])
                for key, value in next_states.items():
                    counter=counter+1
                    path_list = current[2]
                    path_list = path_list + [key]
                    total_cost=current[4]+value[1]
                    astar_val=heuristic(current[3])+total_cost

                    heapq.heappush(L_heap, (astar_val,counter, path_list, value[0],total_cost))

        if not result:

            action_list = []
    '''
    YOUR CODE HERE
    '''

    return action_list

コード例 #21

0

ファイルを表示

ファイル: refinement.py プロジェクト: SwastikNandan/reinforcement

class Refine:
    """
    This class has code to refine the high level actions used by PDDL to the the low level actions used by the TurtleBot.

    """

    def __init__(self, plan_file, planner):
        """
        :param plan_file: Path to the file where plan is stored. This is generally stored at the path from where the planner was called.
        :type plan_file: str
        :param planner: Planner that was used to generate this plan. This is needed to parse the plan and convert to/store in a data structure in Python. Only possible values are FF and FD.
        :type planner: str 

        Attributes:
            **status_subscriber**: To subscribe to the "/status" topic, which gives the State of the robot.

            **actions_queue** (tuple(action_name, action_params)): Used to store the refined action tuples. You can decide what parameters you want to store for each action. Store the parameters that you need to pass to execute_<action_name>_action methods of RobotActionServer class.

            **action_index** (int): Stores the index of action that needs to be sent to RobotActionServer for execution.
        """

        rospy.init_node('listener', anonymous=True)
        self.status_subscriber = rospy.Subscriber('/status', String, self.status_callback)
        self.helper = problem.Helper()
        self.action_index = 0
        print plan_file
        self.actions_queue = self.refine_plan(plan_file, planner)
        self.execute_action()
        rospy.spin()

    def status_callback(self, data):
        """
        Callback function for subscriber to the `/status` rostopic
        
        """
        if(data.data == "Idle"):
            self.execute_action()


    def execute_action(self):
        """
        This method picks an action from self.actions and send it to RobotActionServer for execution.

        :rtype: None
        
        """
        helper = problem.Helper()
        if (self.action_index >= len(self.actions_queue)):
            return
        action = self.actions_queue[self.action_index]
        self.action_index += 1
        print("Executing ", action[0])
        if action[0] == 'move':
            helper.execute_move_action(action[1])
        elif action[0] == 'pick':
            helper.execute_pick_action(action[1][0], action[1][1])
        elif action[0] == 'collect':
            helper.execute_collect_action(action[1][0], action[1][1])
        elif action[0] == 'serve':
            helper.execute_serve_action(action[1][0], action[1][1], action[1][2])
        elif action[0] == 'throw':
            helper.execute_throw_action(action[1][0], action[1][1], action[1][2])
        return

        # Your code here


    def refine_plan(self, plan_file, planner):
        """
        Perform downward refinement to convert the high level plan into a low level executable plan.
        
        :param plan_file: Path to the file where plan is stored. This is generally stored at the path from where the planner was called.
        :type plan_file: str
        :param planner: Planner that was used to generate this plan. This is needed to parse the plan and convert to/store in a data structure in Python. Only possible values are FF and FD.
        :type planner: str 

        :returns: List of refined action tuples that will be added to the execution queue.
        :rtype: list(tuples)

        .. note::

            .. hlist::
                :columns: 1
                
                * Parse the plan from plan_file. This has to be according to the planner you are using.
                * use get_path(current_state, load_locations) to refine Move action.
                * Subject of the book and bin does not match.
                * Robot Location is not within the load location of the bin, i.e. robot is not in the viscinity of the bin.


        """
        def convert_time (time_name1):
            if time_name == one :
                time=1 
            elif time_name == two :
                time=2 
            elif time_name == three :
                time=3 
            else time_name == four :
                time =4 
            return time 
        objects_dict = self.parse_object_data()
        helper = problem.Helper()
        current_state = helper.get_initial_state()
        action_list = []
        with open(plan_file) as f:
            plan_sequence = f.readlines()[:-1]
            for action_info in plan_sequence:
                action_info_list = action_info[1:-2].split()
                action = action_info_list[0]
                if action == 'pick':
                    bowl_name = action_info_list[1]
                    time_name = action_info_list[4]
                    time = convert_time (time_name)
                    action_params = (bowl_name, current_state)
                    action_list.append((action, action_params))
                elif action == 'collect':
                    bowl_name = action_info_list[1]
                    action_params = (bowl_name, current_state)
                    action_list.append((action, action_params))
                elif action == 'serve':
                    bowl_name = action_info_list[1]
                    time_name = action_infor_list[5]
                    table_name = action_info_list[-2]
                    action_params = (bowl_name, table_name, current_state)
                    action_list.append((action, action_params))
                elif action == 'throw':
                    bowl_name = action_info_list[1]
                    dustbin = action_info_list[-2]
                    action_params = (bowl_name, dustbin, current_state)
                    action_list.append((action, action_params))
                elif action == 'move':
                    if "bowl" in action_info_list[-1][:-5]:
                        destination_locations = objects_dict.get('kitchen_table').get('load_loc')
                    else:
                        destination_locations = objects_dict.get(action_info_list[-1][:-5]).get('load_loc')
                    print destination_locations
                    moves_list, destination_state, goal_reached = self.get_path(current_state, destination_locations)
                    if not goal_reached:
                        return []
                    action_list.append((action, moves_list))
                    current_state = destination_state
        print(action_list)
        return action_list

コード例 #22

0

ファイルを表示

def ucs(use_custom_heuristic, use_debug_mode):
    '''
    Perform UCS to find sequence of actions from initial state to goal state
    '''

    global debug_mode
    global debug_sequence
    if use_debug_mode:
        return debug_sequence

    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    action_list = []
    counter = 0  #For breaking ties

    # to get the possible action->(state,cost) mapping from current state
    if helper.is_goal_state(init_state):
        print "Found Goal"
        return action_list
    '''
    Use priority queue to keep track of path and Path Cost
    Each item in frontier is of the form[Cumulative Cost,counter,action1,action2,action3......actionN,(LastNode)]
    '''
    root = init_state
    frontier = []
    explored = set()

    item = [0.0, counter, [], root]
    heapq.heappush(frontier, item)
    counter += 1

    while True:
        if len(frontier) == 0:
            print "No path possible!"
            return action_list

        item = heapq.heappop(frontier)
        node = item.pop()
        node_actions = item.pop()
        node_cum_cost = item.pop(0)
        if helper.is_goal_state(node):
            return node_actions

        explored.add(str(node))
        sucs = helper.get_successor(node)

        for action in sucs:
            counter += 1
            actions = node_actions[:]
            child_node, child_cost = sucs[action]
            child_cost += node_cum_cost

            if (str(child_node)
                    not in explored):  # and (child_node not in frontier):
                index = findnode(frontier, child_node)
                if index != -1:
                    if frontier[index][0] > child_cost:
                        del frontier[index]
                actions.append(action)
                item = [child_cost, counter, actions, child_node]
                heapq.heappush(frontier, item)

    return action_list

コード例 #23

0

ファイルを表示

def astar(use_custom_heuristic, use_debug_mode):
    '''
    Perform A* search to find sequence of actions from initial state to goal state
    '''
    global debug_mode
    global debug_sequence
    if use_debug_mode:
        return debug_sequence

    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    action_list = []
    counter = 0

    if helper.is_goal_state(init_state):
        print "Found Goal"
        return action_list
    '''
    Use priority queue to keep track of path and Path Cost
    '''
    root = init_state
    frontier = []
    explored = set()

    if use_custom_heuristic:
        heuristic = custom_heuristic2(root, goal_state)
    else:
        heuristic = manhattan(root, goal_state)

    node_cum_cost = 0.0
    item = [heuristic, counter, node_cum_cost, [], root]
    heapq.heappush(frontier, item)
    counter += 1

    while True:
        if len(frontier) == 0:
            print "No path possible!"
            return action_list

        item = heapq.heappop(frontier)
        node = item.pop()
        node_actions = item.pop()
        node_cum_cost = item.pop()
        if helper.is_goal_state(node):
            return node_actions

        explored.add(str(node))
        sucs = helper.get_successor(node)
        for action in sucs:
            counter += 1
            actions = node_actions[:]
            child_node, child_cost = sucs[action]
            child_cost += node_cum_cost

            if use_custom_heuristic:
                child_heuristic = custom_heuristic2(child_node, goal_state)
            else:
                child_heuristic = manhattan(child_node, goal_state)

            if (str(child_node) not in explored):
                actions.append(action)
                total_cost = child_heuristic + child_cost
                item = [total_cost, counter, child_cost, actions, child_node]
                heapq.heappush(frontier, item)

    return action_list

コード例 #24

0

ファイルを表示

def astar(use_custom_heuristic):
    '''
    Perform A* search to find sequence of actions from initial state to goal state
    '''
    helper = problem.Helper()
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    possible_actions = helper.get_actions()
    action_list = []

    # to get the possible action->(state,cost) mapping from current state
    state_dictionary = helper.get_successor(init_state)
    '''
    YOUR CODE HERE
    '''
    if use_custom_heuristic:
        if init_state.x != goal_state.x and init_state.y != goal_state.y:

            visited = []
            init_dir = []
            init_cost = 0
            queue = list()
            # heuristic = 0.5
            # start_to_goal_man_dis = abs(init_state.x - goal_state.x) + abs(init_state.y - goal_state.y)
            visited.append(
                [init_state.x, init_state.y, init_state.orientation])

            array = astar_custom_heuristic(state_dictionary, goal_state,
                                           init_dir, init_cost, init_state,
                                           visited)
            queue = queue + array

            while (queue):

                queue.sort(key=lambda ele: ele[3])
                node = queue.pop(0)
                if node[1].x < 0 or node[1].y < 0:
                    continue
                else:
                    if [node[1].x, node[1].y,
                            node[1].orientation] not in visited:
                        if node[1].x == goal_state.x and node[
                                1].y == goal_state.y:
                            action_list = action_list + node[0]
                            break
                        else:
                            visited.append(
                                [node[1].x, node[1].y, node[1].orientation])
                            successor = helper.get_successor(node[1])
                            out_arr = astar_custom_heuristic(
                                successor, goal_state, node[0], node[2],
                                init_state, visited)
                            queue = queue + out_arr

        return action_list

    else:
        if init_state.x != goal_state.x and init_state.y != goal_state.y:

            visited = []
            init_dir = []
            init_cost = 0
            queue = list()
            visited.append(
                [init_state.x, init_state.y, init_state.orientation])
            array = astar_manhattan_heuristic(state_dictionary, goal_state,
                                              init_dir, init_cost, visited)
            queue = queue + array

            while (queue):

                queue.sort(key=lambda ele: ele[3])
                node = queue.pop(0)
                if node[1].x < 0 or node[1].y < 0:
                    continue
                else:
                    if [node[1].x, node[1].y,
                            node[1].orientation] not in visited:
                        if node[1].x == goal_state.x and node[
                                1].y == goal_state.y:
                            action_list = action_list + node[0]
                            break
                        else:
                            visited.append(
                                [node[1].x, node[1].y, node[1].orientation])
                            successor = helper.get_successor(node[1])
                            out_arr = astar_manhattan_heuristic(
                                successor, goal_state, node[0], node[2],
                                visited)
                            queue = queue + out_arr

        return action_list

コード例 #25

0

ファイルを表示

def search(algorithm, time_limit):
    """
        Performs a search using the specified algorithm.
        
        Parameters
        ===========
            algorithm: str
                The algorithm to be used for searching.
            time_limit: int
                The time limit in seconds to run this method for.
                
        Returns
        ========
            tuple(list, int)
                A tuple of the action_list and the total number of nodes
                expanded.
    """
    
    # The helper allows us to access the problem functions.
    helper = problem.Helper()
    
    # Get the initial and the goal states.
    init_state = helper.get_initial_state()
    goal_state = helper.get_goal_state()
    
    # Initialize the g_score map, we use this map to keep a track of the
    # shortest path to a state that we have observed.
    #
    # This map stores data of the form state -> int.
    g_score = {}
    g_score[init_state] = 0

    # Initialize a map to store the best path to a state.
    #
    # This map stores data of the form state -> Node.
    best_path = {}
    best_path[init_state] = None
    
    # Initialize the init node of the search tree and compute its f_score.
    init_node = Node(init_state, None, 0, None, 0)
    f_score = compute_g(algorithm, init_node, goal_state) \
        + compute_h(algorithm, init_node, goal_state)
    
    # Initialize the fringe as a priority queue.
    priority_queue = PriorityQueue()
    priority_queue.push(f_score, init_node)
    
    action_list = []
    total_nodes_expanded = 0
    time_limit = time.time() + time_limit
    while not priority_queue.is_empty() and time.time() < time_limit:
    
        # Pop the node with the smallest f_score from the fringe.
        current_node = priority_queue.pop()
        current_state = current_node.get_state()
        
        # If the current state is a goal state, we are done and need to
        # reconstruct the best path to the goal.
        if helper.is_goal_state(current_state):
        
            action_list = build_solution(best_path, current_node)
            break

        # Expand the node.
        total_nodes_expanded += 1
        action_state_dict = helper.get_successor(current_state)
        for action, state in action_state_dict.items():

            # The successor function actually stores items in the
            # action -> (next_state, cost) format.
            next_state, action_cost = state
            
            # Only process valid states.
            if is_invalid(next_state):
            
                continue
            
            next_node = Node(next_state,
                current_node,
                current_node.get_depth() + 1,
                action,
                action_cost)
                
            g_value = compute_g(algorithm, next_node, goal_state)
            
            # If a cheaper path is found to the next_state, then we can
            # add the next_state to the fringe.
            if g_value < g_score.get(next_state, float("inf")):
            
                # Remember the cheaper path to the next_state from the
                # current_state/current_node.
                #
                # We need the node since we want to remember the action as well.
                best_path[next_state] = current_node
            
                # Remember the cheaper path cost.
                g_score[next_state] = g_value
                
                # Compute the f-value and add to the fringe (unless it is
                # already there).
                f_score = g_value + compute_h(algorithm, next_node, goal_state)
                if not priority_queue.contains(next_state):
                
                    priority_queue.push(f_score, next_node)

    if time.time() >= time_limit:
    
        raise SearchTimeOutError("Search timed out after %u secs." % (time_limit))

    return action_list, total_nodes_expanded