Beispiel #1
0
def t3(episodes):
    # print("Inside task_3")
    global obj_dict
    global table_turtlebot
    epsilon = 0.9
    epsilon_discount = 0.99
    start_time = time.time()
    # total_episodes = 2
    highest_reward = 0
    last_time_steps = np.ndarray(0)
    # root_path="home/deepika/catkin_ws/src/reinforcement"
    # filename_1=root_path+"/books.json"
    # print(filename_1)
    # with open(filename_1) as file:
    #    obj_dict = json.load(file)
    state_init = problem.get_current_state()

    table_turtlebot = Table_turtlebot(state_init)

    for x in range(episodes):
        # print("Inside x")
        done = False
        cumulated_reward = 0
        state_init = problem.get_current_state()
        # print("Initial state is ",state_init)
        # print("data type",type(state_init))
        # table_turtlebot= Table_turtlebot(state_init)
        if epsilon > 0.05:
            epsilon *= epsilon_discount
        for i in range(20000):
            # print("Inside i")
            # Pick an action based on the current state
            action = chooseAction_taskl(state_init, epsilon)
            # print("Action in x :",action)
            # Execute the action and get feedback
            observation, next_state, reward = action_to_execute_func(action)
            # print("Observation:",observation)
            # print("Next state is",next_state)
            # print("reward is:",reward)
            cumulated_reward += reward
            update(state_init, action, reward, next_state)
            # term=problem.is_terminal_state()
            # print("term:",term)
            if problem.is_terminal_state() == 0:
                state_init = next_state
            else:
                last_time_steps = np.append(last_time_steps, [int(i + 1)])
                # print("last time steps:",last_time_steps)
                # print("Terminal state is reached in",i)
                break
        print("Episode ", x + 1, "Reward :", cumulated_reward)
        problem.reset_world()
Beispiel #2
0
def t2(total_episodes):
    # print("Inside task_2")
    epsilon = 0.9
    epsilon_discount = 0.99
    start_time = time.time()
    # total_episodes = 2
    highest_reward = 0
    last_time_steps = np.ndarray(0)

    for x in range(total_episodes):
        # print("Inside x")
        done = False
        cumulated_reward = 0
        state_init = problem.get_current_state()
        # print("Initial state is ",state_init)
        # print("data type",type(state_init))
        if epsilon > 0.05:
            epsilon *= epsilon_discount
        for i in range(70000):
            # print("Inside i")
            # Pick an action based on the current state
            action = chooseAction(state_init, epsilon)
            # print("Action in x :",action)
            # Execute the action and get feedback
            observation, next_state, reward = action_to_execute_func(action)
            # print("Next state is",next_state)
            # print("reward is:",reward)
            cumulated_reward += reward

            update(state_init, action, reward, next_state)
            term = problem.is_terminal_state()
            # print("term:",term)
            if problem.is_terminal_state() == 0:
                state_init = next_state
            else:
                last_time_steps = np.append(last_time_steps, [int(i + 1)])
                # print("last time steps:",last_time_steps)
                # print("Terminal state is reached in",i)
                break
        print("Episode ", x + 1, "Reward :", cumulated_reward)
        problem.reset_world()
def q_learning_task3(num_episodes):
    action_list = problem.get_all_actions()
    epsilon = 0.99
    alpha = 0.3
    gamma = 0.9
    action_list = problem.get_all_actions()
    number_of_actions = len(action_list)
    #print action_list
    q_table = [[0 for j in range(number_of_actions)] for i in range(1)]
    zero_list = [0 for i in range(number_of_actions)]
    outerloopcnt = 0
    book_loc_data = []
    trolley_loc_data = []
    new_action_list = []

    #parsing json file
    with open('books.json') as book_parse:
        data = json.load(book_parse)

    loc_data = data["books"]
    #print len(loc_data)
    for i in range(0, len(loc_data)):
        book_name = "book_" + str(i + 1)
        book_loc_data.append(loc_data[book_name]["load_loc"])
    #print book_loc_data

    bin_data = data["bins"]
    #print len(bin_data)
    for i in range(0, len(bin_data)):
        trolley_name = "trolly_" + str(i + 1)
        trolley_loc_data.append(bin_data[trolley_name]["load_loc"])
    #print trolley_loc_data

    for i in range(num_episodes):
        current_location = problem.get_current_state()
        cumulative_reward = 0
        state_list = []
        state_list.append(current_location)
        count = 0
        if (outerloopcnt % 10 == 0):
            epsilon -= 0.1
        #Using Epsilon greedy approach
        # Run the episode till it reaches the terminal state

        while (problem.is_terminal_state() == 0):
            str_out = ""
            state_index = state_list.index(current_location)
            index = current_location.find('turtlebot3_burger')
            bot_loc = []
            if (index != -1):
                bot_location = current_location[index:]
                loc_list = bot_location.split(',')
                x_loc = loc_list[1]
                y_loc = loc_list[2]
                bot_loc.append(float(x_loc))
                bot_loc.append(float(y_loc))

            rand = random.random()
            if (rand < epsilon):
                #selecting the action randomly
                new_action_list = copy.deepcopy(action_list)
                if (bot_loc not in book_loc_data[0]):
                    i = 0
                    while i < len(new_action_list):
                        if "careful_pick" in new_action_list[i]:
                            new_action_list.remove(new_action_list[i])
                        i = i + 1
                    i = 0
                    while i < len(new_action_list):
                        if "normal_pick" in new_action_list[i]:
                            new_action_list.remove(new_action_list[i])
                        i = i + 1
                if (bot_loc in trolley_loc_data[0]):
                    i = 0
                    while i < len(new_action_list):
                        if "normal_place" in new_action_list[i]:
                            new_action_list.remove(new_action_list[i])
                        i = i + 1
                    i = 0
                    while i < len(new_action_list):
                        if "careful_place" in new_action_list[i]:
                            new_action_list.remove(new_action_list[i])
                        i = i + 1
                action = random.choice(new_action_list)
                action_index = action_list.index(action)
            else:
                #Taking the maximum valued action from the q_table for that state
                max_value = np.amax(q_table[state_index])
                action_index = q_table[state_index].index(max_value)
                action = action_list[action_index]
                #print bot_loc

            #Executing the action corresponding to the action selected
            if ('normal_moveF' in action):
                success, next_state, reward = problem.execute_normal_moveF()
            elif ('normal_TurnCW' in action):
                success, next_state, reward = problem.execute_normal_TurnCW()
            elif ('normal_TurnCCW' in action):
                success, next_state, reward = problem.execute_normal_TurnCCW()
            elif ('careful_moveF' in action):
                success, next_state, reward = problem.execute_careful_moveF()
            elif ('careful_TurnCW' in action):
                success, next_state, reward = problem.execute_careful_TurnCW()
            elif ('careful_TurnCCW' in action):
                success, next_state, reward = problem.execute_careful_TurnCCW()
            elif ('normal_place' in action):
                action = action.split(' ')
                book_name = action[1]
                bin_name = action[2][:-1]
                success, next_state, reward = problem.execute_normal_place(
                    book_name, bin_name)
            elif ('careful_place' in action):
                action = action.split(' ')
                book_name = action[1]
                bin_name = action[2][:-1]
                success, next_state, reward = problem.execute_careful_place(
                    book_name, bin_name)
            elif ('normal_pick' in action):
                action = action.split(' ')
                book_name = action[1][:-1]
                success, next_state, reward = problem.execute_normal_pick(
                    book_name)
            elif ('careful_pick' in action):
                action = action.split(' ')
                book_name = action[1][:-1]
                success, next_state, reward = problem.execute_careful_pick(
                    book_name)
            else:
                print "specified action is not in action list"

            if next_state in state_list:
                next_state_index = state_list.index(next_state)
            else:
                state_list.append(next_state)
                next_state_index = state_list.index(next_state)
                q_table.append(zero_list)

            old_value = q_table[state_index][action_index]
            next_max = np.amax(q_table[next_state_index])
            new_value = (float(1 - alpha) * old_value) + (alpha *
                                                          ((reward) +
                                                           (gamma * next_max)))
            #update the q_table
            q_table[state_index][action_index] = new_value
            str_out += '"(' + state_list[state_index] + ',' + action_list[
                action_index] + ',' + state_list[next_state_index] + ',' + str(
                    reward) + ')'
            str_out += ':' + str(q_table[state_index][action_index]) + '"'
            pub.publish(str_out)

            #calculating the reward
            if (count == 0):
                cumulative_reward += reward
            else:
                cumulative_reward += (gamma**count) * reward

            count += 1
            current_location = next_state

            #print current_location
        print(outerloopcnt, count, cumulative_reward)
        outerloopcnt += 1
        problem.reset_world()
def q_learning_task2(num_episodes):
    action_list = problem.get_all_actions()
    epsilon = 0.99
    alpha = 0.3
    gamma = 0.9
    action_list = problem.get_all_actions()
    number_of_actions = len(action_list)
    q_table = [[0 for j in range(number_of_actions)] for i in range(1)]
    zero_list = [0 for i in range(number_of_actions)]
    outerloopcnt = 0

    for i in range(num_episodes):
        current_location = problem.get_current_state()
        cumulative_reward = 0
        state_list = []
        state_list.append(current_location)
        count = 0
        if (outerloopcnt % 10 == 0):
            epsilon -= 0.1
        #Using Epsilon greedy approach
        # Run the episode till it reaches the terminal state
        while (problem.is_terminal_state() == 0):
            str_out = ""
            state_index = state_list.index(current_location)
            rand = random.random()
            if (rand < epsilon):
                #selecting the action randomly
                action = random.choice(action_list)
                action_index = action_list.index(action)
            else:
                #Taking the maximum valued action from the q_table for that state
                max_value = np.amax(q_table[state_index])
                action_index = q_table[state_index].index(max_value)
                action = action_list[action_index]

            #Executing the action corresponding to the action selected
            if ('normal_moveF' in action):
                success, next_state, reward = problem.execute_normal_moveF()
            elif ('normal_TurnCW' in action):
                success, next_state, reward = problem.execute_normal_TurnCW()
            elif ('normal_TurnCCW' in action):
                success, next_state, reward = problem.execute_normal_TurnCCW()
            elif ('careful_moveF' in action):
                success, next_state, reward = problem.execute_careful_moveF()
            elif ('careful_TurnCW' in action):
                success, next_state, reward = problem.execute_careful_TurnCW()
            elif ('careful_TurnCCW' in action):
                success, next_state, reward = problem.execute_careful_TurnCCW()
            elif ('normal_place' in action):
                action = action.split(' ')
                book_name = action[1]
                bin_name = action[2][:-1]
                success, next_state, reward = problem.execute_normal_place(
                    book_name, bin_name)
            elif ('careful_place' in action):
                action = action.split(' ')
                book_name = action[1]
                bin_name = action[2][:-1]
                success, next_state, reward = problem.execute_careful_place(
                    book_name, bin_name)
            elif ('normal_pick' in action):
                action = action.split(' ')
                book_name = action[1][:-1]
                success, next_state, reward = problem.execute_normal_pick(
                    book_name)
            elif ('careful_pick' in action):
                action = action.split(' ')
                book_name = action[1][:-1]
                success, next_state, reward = problem.execute_careful_pick(
                    book_name)
            else:
                print "specified action is not in action list"

            if next_state in state_list:
                next_state_index = state_list.index(next_state)
            else:
                state_list.append(next_state)
                next_state_index = state_list.index(next_state)
                q_table.append(zero_list)

            old_value = q_table[state_index][action_index]
            next_max = np.amax(q_table[next_state_index])
            new_value = (float(1 - alpha) * old_value) + (alpha *
                                                          ((reward) +
                                                           (gamma * next_max)))
            #update the q_table
            q_table[state_index][action_index] = new_value
            str_out += '"(' + state_list[state_index] + ',' + action_list[
                action_index] + ',' + state_list[next_state_index] + ',' + str(
                    reward) + ')'
            str_out += ':' + str(q_table[state_index][action_index]) + '"'
            pub.publish(str_out)

            #calculating the reward
            if (count == 0):
                cumulative_reward += reward
            else:
                cumulative_reward += (gamma**count) * reward

            count += 1
            current_location = next_state

            #print current_location
        print(outerloopcnt, count, cumulative_reward)
        outerloopcnt += 1
        problem.reset_world()
Beispiel #5
0
    def execute_track_t1(self):
        q = QTable()
        q.table = OrderedDict()
        print("Executing trajectory.")
        try:
            for curr_state, action_reward in self.track.items():
                action, reward = action_reward
                action = action.replace('(', '').replace(')',
                                                         '').replace(',', '')
                reward = float(reward)

                if curr_state not in q.table:
                    q.table[curr_state] = [0] * self.total_actions

                # Q value for this state and action Q(s,a)
                q.old_value = float(q.table[curr_state][self.index[action]])

                # Execute selected action
                status, new_state, _ = take_step(action)

                # print status, new_state, reward
                q.count_actions += 1

                if reward > 0:
                    print(action, reward)

                # Get max Q from next state
                if new_state in q.table:
                    q.next_max = float(max(q.table[new_state]))

                new_value = (1 - q.alpha) * q.old_value + q.alpha * (
                    reward + q.gamma * q.next_max)
                q.table[curr_state][self.index[action]] = new_value

                if q.count_actions == 1:
                    q.reward += reward
                else:
                    q.r_gamma *= 0.9
                    q.reward += (q.r_gamma * reward)

            print("Reached terminal state")
        finally:
            message = ''
            for k, v in q.table.items():
                action_value = {}
                for i, item in enumerate(v):
                    action_value[self.actions[i]] = item

                q.table[k] = action_value

            for k, v in q.table.items():
                message += k + ' : '
                message += str(v)
                message += '-'

            message = message[:-1]
            print 'publish message'
            self.publisher.publish(message)
            problem.reset_world()
            print('World Reset')
            q.reward = 0
            q.r_gamma = 1
Beispiel #6
0
    def train_t3(self, max_episodes):
        q = QTable()
        get_load_locations(self.init_state)
        curr_state = self.init_state
        curr_state = sort_state(curr_state)
        print("Searching the purpose of life.")
        for epoch in range(0, 2 * max_episodes):
            try:
                if epoch % 2 == 0:
                    print("Executing actions along with Exlporation")
                else:
                    print("Executing actions with Learned Policy only")

                while not problem.is_terminal_state():
                    if curr_state not in q.table:
                        q.table[curr_state] = [0] * self.total_actions

                    if random.uniform(0, 1) < q.epsilon and epoch % 2 == 0:
                        # print("Exploring..")
                        action = self.actions[random.randint(0, 9)]
                    else:
                        m = max(q.table[curr_state])
                        indices = [
                            j for j, k in enumerate(q.table[curr_state])
                            if k == m
                        ]
                        # This keeps bot from always picking the 1st best
                        # action and randomly chooses from all best choices
                        idx = random.choice(indices)
                        action = self.actions[idx]

                    if ('place' in action and not is_bin_location(curr_state)) \
                            or ('pick' in action and not is_book_location(curr_state)):
                        # Choose best action from pruned actions
                        # a_rewards = q.table[curr_state][0:3]+q.table[curr_state][43:46]
                        # idx = a_rewards.index(max(a_rewards))
                        # action = pruned_actions[idx]
                        action = random.choice(pruned_actions)

                    # Q value for this state and action Q(s,a)
                    q.old_value = float(
                        q.table[curr_state][self.index[action]])

                    # Execute selected action
                    status, new_state, reward = take_step(action)

                    if reward > 0:
                        print(action, reward, status)

                    sort_state(new_state)
                    # Get max Q from next state
                    if new_state in q.table:
                        q.next_max = float(max(q.table[new_state]))

                    new_value = (1 - q.alpha) * q.old_value + q.alpha * (
                        reward + q.gamma * q.next_max)
                    q.table[curr_state][self.index[action]] = new_value

                    if q.count_actions == 1:
                        q.reward += reward
                    else:
                        q.r_gamma *= 0.9
                        q.reward += (q.r_gamma * reward)

                    message = "(" + curr_state + "," + action + "," + new_state + "," + str(
                        reward) + ")" + " : " + str(new_value)
                    self.publisher.publish(message)
                    # print message

                    curr_state = new_state
                print("Total Reward: " + str(q.reward))
                print("Reached terminal state")
            finally:
                problem.reset_world()
                print('World Reset')
                q.reward = 0
                q.r_gamma = 1