Esempio n. 1
0
    def task1(self, trajectories_json_file):
        q_values = {}
        # Your code here
        print(self.helper.get_all_actions())
        #print(self.helper.get_all_actions(2))
        api.execute_action('moveF', {}, 'robot1')
        #self.helper.execute_action('moveF', {}, 'robot2')

        return q_values
Esempio n. 2
0
 def raw_execute_action(self, list_of_actions_to_execute, bot_location):
     for action in list_of_actions_to_execute:
         actions = {}
         actions["action_name"] = action
         actions["action_params"] = {}
         if (action == "CLEAN"):
             actions["action_name"] = "clean"
             actions["action_params"]["dirt_id"] = str(self.dirt_location_to_id_mapping[bot_location])
         temp, next_state = api.execute_action(actions["action_name"], actions["action_params"])
     return next_state
Esempio n. 3
0
    def random_walk(self):

        print("Robot can perform following actions: {}".format(
            self.action_list))
        while True:

            if api.is_terminal_state(self.current_state):
                print "Goal Reached"
                break

            possible_actions = api.get_possible_actions(self.current_state)
            print("Possible actions in current state: {}".format(
                possible_actions))

            for action in possible_actions:
                print "Action {}".format(action)
                if action == "pick":  # try to pick book 1
                    action_params = {"book_name": "book_1"}
                elif action == "place":
                    action_params = {
                        "book_name": "book_1",
                        "bin_name": "trolly_2"
                    }
                else:
                    action_params = {}

                states = api.get_possible_states(self.current_state, action,
                                                 action_params)
                print "Possible states are:"
                for state in states:
                    next_state = states[state][0]
                    probability = states[state][1]
                    print state
                    print "State: ", next_state
                    print "Probability: ", probability
                    print "Reward: ", api.get_reward(self.current_state,
                                                     action, next_state)
                    print ""

            idx = random.randint(0, len(possible_actions) - 1)
            chosen_action = possible_actions[idx]
            if chosen_action == "pick":  # try to pick book 1
                action_params = {"book_name": "book_1"}
            elif chosen_action == "place":
                action_params = {"book_name": "book_1", "bin_name": "trolly_2"}
            else:
                action_params = {}

            print "Executing action: {} with params: {}".format(
                chosen_action, action_params)

            success, next_state = api.execute_action(chosen_action,
                                                     action_params)
            if success == 1:
                print "Successfully executed"
            else:
                print "Action failed"

            self.current_state = next_state
            print "updated current state:"
            print self.current_state

            raw_input("\nPress Enter to continue execution...")
Esempio n. 4
0
if __name__ == "__main__":
    args = parser.parse_args()

    if args.command == 'get_current_state':
        print(api.get_current_state())
    elif args.command == 'is_terminal_state':
        current_state = api.get_current_state()
        print(api.is_terminal_state(current_state))
    elif args.command == 'reset_world':
        print(api.reset_world())
    elif args.command == 'get_all_actions':
        print(api.get_all_actions())
    elif args.command == 'get_possible_actions':
        current_state = api.get_current_state()
        print(api.get_possible_actions(current_state))
    elif args.command == 'get_possible_states':
        current_state = api.get_current_state()
        for robot in current_state['robots'].keys():
            for action in api.get_possible_actions(current_state, robot):
                print(
                    api.get_possible_states(current_state, action,
                                            {'robot': robot}))
    elif args.command == 'execute_action':
        current_state = api.get_current_state()
        success, next_state = api.execute_action(args.action, {})
        print(success)
        print(api.get_reward(current_state, args.action, next_state))
    elif args.command == 'get_path':
        print(api.get_path('robot0', (0.0, -2.0)))
Esempio n. 5
0
            action = line[line.index(':') + 1:].lower().split()
            if (action[0] == 'move'):
                robot = action[3]
                if 'house' in action[2]:
                    target = action[2].split('_')[1]
                    target = current_state['houses'][target]['deliver_loc']
                elif 'box' in action[2]:
                    target = action[2][:-5]
                    target = current_state['packages'][target]['load_loc'][0]
                else:
                    assert False

                actions = api.get_path(robot, target)

                for act in actions:
                    success, current_state = api.execute_action(act, [])
                    if success != 1:
                        print(act + ' failed!')
                        break

            elif (action[0] == 'load'):
                robot = action[4]
                success, current_state = api.execute_action(
                    robot + '-load', [])
                if success != 1:
                    print('load failed!')
                    break

            elif (action[0] == 'deliver'):
                robot = action[5]
                success, current_state = api.execute_action(
    def path_generation(self):

        print("Robot can perform following actions: {}".format(
            self.action_list))
        #f=open(plan_file, "r")
        #contents =f.readlines()
        with open(ROOT_PATH + "/objects.json", 'r') as json_file:
            try:
                objects = json.load(json_file)
            except (ValueError, KeyError, TypeError):
                print "JSON error"

        #print objects
        cans_cups = objects["cans"]
        cans_cups.update(objects["cups"])
        from_location = State(0.0, 0.0, 'EAST')
        f = open(ROOT_PATH + '/state_action.txt', 'w+')
        init_state = (api.get_current_state()['robot']['x'],
                      api.get_current_state()['robot']['y'],
                      api.get_current_state()['robot']['orientation'])
        f.write("%s\t" % str(init_state))
        while cans_cups:
            choice = random.choice(cans_cups.values())
            print choice["load_loc"]
            key = cans_cups.keys()[cans_cups.values().index(choice)]
            actions, dummy_from_location, result = self.get_path(
                from_location, choice["load_loc"], choice["loc"])
            print actions, dummy_from_location, result
            for action in actions:
                action_params = {}
                #print "Executing actions:",action
                success, next_state = api.execute_action(action, action_params)
                if not success:
                    break
                f.write("%s \n" % action)
                init_state = (api.get_current_state()['robot']['x'],
                              api.get_current_state()['robot']['y'],
                              api.get_current_state()['robot']['orientation'])
                f.write("%s \t" % str(init_state))
            print "Successfully reached object:", key
            if result:
                del cans_cups[key]
                if "can" in key:
                    can_num = key.split("_")[1]
                    obj_name = "coke@can{}".format(can_num)
                else:
                    cup_num = key.split("_")[1]
                    obj_name = "plastic@cup{}".format(cup_num)
                success, next_state = api.execute_action(
                    'sense', {"obj_name": obj_name})
                if next_state["sees_can"]:
                    print("I saw a can!!!")
                    success, next_state = api.execute_action(
                        'pick', {"can_name": key})
                    f.write("pick_%s \t" % str(key))
                    print "I have executed pick action:", next_state
                    actions, dummy_from_location, result = self.get_path(
                        dummy_from_location,
                        objects["bins"]["bin"]["load_loc"],
                        objects["bins"]["bin"]["loc"])
                    print actions, dummy_from_location, result
                    for action in actions:
                        action_params = {}
                        #print "Executing actions:",action
                        success, next_state = api.execute_action(
                            action, action_params)
                        if not success:
                            break
                        f.write("%s \n" % action)
                        init_state = (
                            api.get_current_state()['robot']['x'],
                            api.get_current_state()['robot']['y'],
                            api.get_current_state()['robot']['orientation'])
                        f.write("%s \t" % str(init_state))
                    print "Successfully reached bin"
                    success, next_state = api.execute_action(
                        'place', {
                            "can_name": key,
                            "bin_name": "bin"
                        })
                    print "I have executed place action:", next_state
                    f.write("place_%s \t" % str(key))
                else:
                    print("I saw a cup!!!")
                from_location = dummy_from_location
        f.close()
        print "Successfully Completed"
Esempio n. 7
0
    def task5(self, episodes):
        '''for running the simulation after training'''
        q_values = {}
        # Your code here

        actions_json_file = '/action_config.json'

        with open(self.root_path + actions_json_file) as json_file:
            try:
                self.action_reference = json.load(json_file, parse_float=float)
            except (ValueError, KeyError, TypeError):
                print "JSON error"

        self.book_penalty = -100
        self.bump_penalty = -100

        # =============================================================================
        #         q tables initialized to zero
        # =============================================================================
        #(x,y, orientation, c1,c2,tbot_near,action) #c1,c2 will be zero if available, and one if picked up

        q1 = np.load('q_table_r1_6.npy')
        q2 = np.load('q_table_r2_6.npy')
        # =============================================================================
        #       Create Agents
        # =============================================================================

        agent1_books = [1, 2, 3]
        agent2_books = [4, 5, 6]

        agent1 = Agent('robot1', q1, agent1_books, more_books=True)
        agent2 = Agent('robot2', q2, agent2_books, more_books=True)

        tbot_list = [agent1, agent2]

        R_cumulative = {agent1.name: [], agent2.name: []}

        # =============================================================================
        #       acting
        # =============================================================================

        tbot_active = agent1
        tbot_passive = agent2
        epsilon = 0
        # a single episode
        api.reset_world()
        R_cumulative_active = 0
        R_cumulative_passive = 0
        initial_state = api.get_current_state()
        current_state = initial_state

        state_active = tbot_active.dict_to_np_state(
            current_state, tbot_passive)  #active bots state tuple
        state_passive = tbot_passive.dict_to_np_state(
            current_state, tbot_active)  #passive bots state tuple
        #pdb.set_trace()

        while not api.is_terminal_state(current_state):

            through_api = True  # flag for going through API for an action, if False, then reward is given manually

            # =============================================================================
            #               active tbot acts and learns
            # =============================================================================
            #pick action for tbot_active
            #choose either random or exploit, according to epsilon=epsilon_calc(epsilon_initial, epsilon_decay, i)
            # =============================================================================
            #                 if state_active[0]>=5 or state_passive[0]>=5:
            #                     pdb.set_trace()
            # =============================================================================
            action_A, action_items_A, action_params_A, action_string_A = self.choose_action(
                tbot_active, epsilon, state_active)  #selects action

            through_api, next_state, reward = self.reward_prune(
                current_state, state_active, state_passive, action_A,
                action_items_A, tbot_active, tbot_passive
            )  #prunes by checking for invalid actions, in which case we don't run through environment_api
            #pdb.set_trace()
            if through_api:
                success, next_state = api.execute_action(
                    action_A, action_params_A, tbot_active.name)
                reward = api.get_reward(current_state, action_A, next_state)

            R_cumulative_active += reward

            next_state_active = tbot_active.dict_to_np_state(
                next_state, tbot_passive)

            #update q_values of tbot_active ONLY

            state_action_idx = tuple(state_active) + tuple(
                [tbot_active.idx_to_action.index(action_string_A)])

            current_state = next_state  # udpate current state so the other tbot knows the updated state before choosing an action etc.

            state_active = tbot_active.dict_to_np_state(
                current_state, tbot_passive)  #active bots state tuple
            state_passive = tbot_passive.dict_to_np_state(
                current_state, tbot_active)  #passive bots state tuple

            # =============================================================================
            #               passive tbot acts and does NOT learn
            # =============================================================================
            action_P, action_items_P, action_params_P, action_string_P = self.choose_action(
                tbot_passive, epsilon, state_passive)

            through_api, next_state, reward = self.reward_prune(
                current_state, state_passive, state_active, action_P,
                action_items_P, tbot_passive,
                tbot_active)  #reward won't be used

            if through_api:
                success, next_state = api.execute_action(
                    action_P, action_params_P, tbot_passive.name)
                reward = api.get_reward(current_state, action_P, next_state)

            R_cumulative_passive += reward
            current_state = next_state  # udpate current state for active tbot

            state_active = tbot_active.dict_to_np_state(
                current_state, tbot_passive)  #active bots state tuple
            state_passive = tbot_passive.dict_to_np_state(
                current_state, tbot_active)  #passive bots state tuple

        R_cumulative[tbot_active.name].append(R_cumulative_active)
        R_cumulative[tbot_passive.name].append(R_cumulative_passive)
        print(R_cumulative)
Esempio n. 8
0
    def task4(self, episodes):
        ''' train with 3 books for each tbot'''
        #pdb.set_trace()
        q_values = {}

        actions_json_file = '/action_config.json'

        with open(self.root_path + actions_json_file) as json_file:
            try:
                self.action_reference = json.load(json_file, parse_float=float)
            except (ValueError, KeyError, TypeError):
                print "JSON error"


# =============================================================================
#       episode parameters
# =============================================================================
#there are actually 2*episodes episodes, since there are two tbots
        episode_update = 2  #the amount of episodes a tbot will train while the other tbots policy remains constant. must be a divisor of episodes
        episode_blocks = int(
            episodes / episode_update
        )  #number of episode blocks, each episode block one tbot is updating their q table while the other only acts
        # =============================================================================
        #       epsilon parameters & set penalty values
        # =============================================================================
        epsilon_initial = .95
        epsilon_decay = .002
        epsilon_calc = lambda epsilon_initial, epsilon_decay, i: max(
            0.05, epsilon_initial - epsilon_decay * i)

        self.book_penalty = -100
        self.bump_penalty = -100

        # =============================================================================
        #         q tables initialized to zero
        # =============================================================================
        q1 = np.zeros(
            (7, 7, 4, 2, 2, 2, 2, 2, 2, 5, 9)
        )  #(x,y, orientation, c1,c2,c3,c4,c5,c6,tbot_near,action) #c1,c2 will be zero if available, and one if picked up
        q2 = np.zeros((7, 7, 4, 2, 2, 2, 2, 2, 2, 5, 9))

        # =============================================================================
        #       Create Agents
        # =============================================================================

        agent1_books = [1, 2, 3]
        agent2_books = [4, 5, 6]

        agent1 = Agent('robot1', q1, agent1_books, more_books=True)
        agent2 = Agent('robot2', q2, agent2_books, more_books=True)

        tbot_list = [agent1, agent2]

        R_cumulative = {agent1.name: [], agent2.name: []}

        # =============================================================================
        #       acting and training
        # =============================================================================

        for i in range(episode_blocks):
            epsilon = epsilon_calc(
                epsilon_initial, epsilon_decay,
                i)  #epsilon can only changes every episode block
            for tbot in tbot_list:  #determines which tbot is learning, active updates table, passive does not
                tbot_active = tbot
                tbot_passive_set = set(tbot_list) - set([tbot])
                tbot_passive = tbot_passive_set.pop()
                for e in range(
                        episode_update
                ):  #cycle through the episodes inside an episode block
                    epsilon = epsilon_calc(epsilon_initial, epsilon_decay,
                                           i * episode_update + e)
                    # a single episode
                    api.reset_world()
                    R_cumulative_active = 0
                    R_cumulative_passive = 0
                    initial_state = api.get_current_state()
                    current_state = initial_state

                    state_active = tbot_active.dict_to_np_state(
                        current_state, tbot_passive)  #active bots state tuple
                    state_passive = tbot_passive.dict_to_np_state(
                        current_state, tbot_active)  #passive bots state tuple
                    #pdb.set_trace()
                    print('episode_block {0} episode {1} for tbot {2}'.format(
                        i, e, tbot_active.name))
                    while not api.is_terminal_state(current_state):

                        through_api = True  # flag for going through API for an action, if False, then reward is given manually

                        # =============================================================================
                        #               active tbot acts and learns
                        # =============================================================================
                        #pick action for tbot_active
                        #choose either random or exploit, according to epsilon=epsilon_calc(epsilon_initial, epsilon_decay, i)
                        # =============================================================================
                        #                 if state_active[0]>=5 or state_passive[0]>=5:
                        #                     pdb.set_trace()
                        # =============================================================================
                        action_A, action_items_A, action_params_A, action_string_A = self.choose_action(
                            tbot_active, epsilon,
                            state_active)  #selects action

                        through_api, next_state, reward = self.reward_prune(
                            current_state, state_active, state_passive,
                            action_A, action_items_A, tbot_active, tbot_passive
                        )  #prunes by checking for invalid actions, in which case we don't run through environment_api
                        #pdb.set_trace()
                        if through_api:
                            success, next_state = api.execute_action(
                                action_A, action_params_A, tbot_active.name)
                            reward = api.get_reward(current_state, action_A,
                                                    next_state)

                        R_cumulative_active += reward

                        next_state_active = tbot_active.dict_to_np_state(
                            next_state, tbot_passive)

                        #update q_values of tbot_active ONLY

                        state_action_idx = tuple(state_active) + tuple(
                            [tbot_active.idx_to_action.index(action_string_A)])
                        tbot_active.q[state_action_idx] = (
                            1 - self.alpha
                        ) * tbot_active.q[state_action_idx] + self.alpha * (
                            reward +
                            self.gamma * max(tbot_active.q[next_state_active]))

                        current_state = next_state  # udpate current state so the other tbot knows the updated state before choosing an action etc.

                        state_active = tbot_active.dict_to_np_state(
                            current_state,
                            tbot_passive)  #active bots state tuple
                        state_passive = tbot_passive.dict_to_np_state(
                            current_state,
                            tbot_active)  #passive bots state tuple

                        # =============================================================================
                        #               passive tbot acts and does NOT learn
                        # =============================================================================
                        action_P, action_items_P, action_params_P, action_string_P = self.choose_action(
                            tbot_passive, epsilon, state_passive)

                        through_api, next_state, reward = self.reward_prune(
                            current_state, state_passive, state_active,
                            action_P, action_items_P, tbot_passive,
                            tbot_active)  #reward won't be used

                        if through_api:
                            success, next_state = api.execute_action(
                                action_P, action_params_P, tbot_passive.name)
                            reward = api.get_reward(current_state, action_P,
                                                    next_state)

                        R_cumulative_passive += reward
                        current_state = next_state  # udpate current state for active tbot

                        state_active = tbot_active.dict_to_np_state(
                            current_state,
                            tbot_passive)  #active bots state tuple
                        state_passive = tbot_passive.dict_to_np_state(
                            current_state,
                            tbot_active)  #passive bots state tuple

                    R_cumulative[tbot_active.name].append(R_cumulative_active)
                    R_cumulative[tbot_passive.name].append(
                        R_cumulative_passive)
                    print(R_cumulative)
                    print('on the {0}th episode is {1}'.format(
                        i * episode_update + e, epsilon))
        np.save('q_table_r1_6.npy', agent1.q)
        np.save('q_table_r2_6.npy', agent2.q)

        import pickle
        with open("robot1_rewards_6.txt", "wb") as f:
            pickle.dump(R_cumulative['robot1'], f)

        with open("robot2_rewards_6.txt", "wb") as f:
            pickle.dump(R_cumulative['robot2'], f)