Esempio n. 1
0
    def __init__(self, all_states, all_actions, initial_location, loc_dict,
                 num_columns, num_rows, transition_dict, init_task_status,
                 eq_eps, rob_id, reward_dict):
        self.location = initial_location
        self.all_actions = all_actions  # Actions include: { Stay:1, North:2, South:3, East:4, West:5}
        self.location_dictionary = copy.deepcopy(loc_dict)
        self.num_columns = num_columns
        self.num_rows = num_rows
        self.transition_dict = copy.deepcopy(transition_dict)
        self.ZERO_EPSILON = eq_eps
        self.rob_id = rob_id
        self.reward_dict = copy.deepcopy(reward_dict)

        init_loc_key = helper.get_key_from_dict_by_value(
            initial_location, loc_dict)
        self.my_state = helper.get_key_from_dict_by_value(
            [init_loc_key, init_task_status], all_states)

        self.values = []
        for _ in all_states:
            self.values.append(0.0)

        act_init_val = []
        for _ in self.all_actions:
            act_init_val.append(2.5)  # mean rewards

        self.q_table = []
        for _ in all_states:
            self.q_table.append(act_init_val)

        self.rob_state_mapping = {}
Esempio n. 2
0
 def set_initial_state(self, init_task_status, env_states_dict):
     #  helper.get_key_from_dict_by_value([init_loc_key, init_task_status], all_states)
     location_key = helper.get_key_from_dict_by_value(
         self.location, self.location_dictionary)
     state = [location_key, init_task_status]
     # print(" $$$$$$$$$$$ state: ", state)
     # print(" env_states_dict: ", env_states_dict)
     state_key_general = helper.get_key_from_dict_by_value(
         state, env_states_dict)
     # print("  state_key_general : ", state_key_general)
     state_key_spesific = self.rob_state_mapping.get(state_key_general)
     # print(" \n %%%$$$$%%%$$   state_key_spesific:  ", state_key_spesific)
     self.my_state = state_key_spesific
Esempio n. 3
0
    def set_task_status(self, tsk_key, all_states_dict):
        all_st = copy.deepcopy(all_states_dict)
        old_state_key = self.my_state
        old_state = all_st.get(old_state_key)

        old_state[1] = tsk_key
        self.my_state = helper.get_key_from_dict_by_value(old_state, all_st)
Esempio n. 4
0
    def modify_rob_state_by_tsk_change(self, tsk_sts_key, env_states_dict):
        """
        changing robot's state when task status changes but robot's position is fixed.
        :param tsk_sts_key:
        :param all_states_dict:
        :return:
        """
        ksp_states_dict = copy.deepcopy(self.kSP_states_dict)
        # print(" \n ********* In modify_rob_state_by_tsk_change:  *********** ")
        # print(" tsk_sts_key: ", tsk_sts_key)
        # print(" env_states_dict:  ", env_states_dict)
        # print("  self.rob_state_mapping:  ", self.rob_state_mapping)
        # print(" self.my_state:  ", self.my_state)
        env_all_st = copy.deepcopy(env_states_dict)
        current_state_key = self.my_state
        current_state_list = ksp_states_dict.get(current_state_key)
        # print(" self.rob_state_mapping.get(current_state_key)", self.rob_state_mapping.get(current_state_key))
        # print(" current_state_list:  ", current_state_list)
        current_pos_key = current_state_list[0]
        new_state_list = [current_pos_key, tsk_sts_key]

        general_st = helper.get_key_from_dict_by_value(new_state_list,
                                                       env_all_st)
        # print(" \n self.rob_state_mapping.get(general_st):  ", self.rob_state_mapping.get(general_st))
        self.my_state = self.rob_state_mapping.get(general_st)
Esempio n. 5
0
    def set_my_state(self, state, env_all_states_dict):
        env_all_st = copy.deepcopy(env_all_states_dict)
        # self.my_state = helper.get_key_from_dict_by_value(state, all_st)
        general_st = helper.get_key_from_dict_by_value(state, env_all_st)
        self.my_state = self.rob_state_mapping.get(general_st)
        # print(" in set_my_state,   self.my_state:   ", self.my_state)

        all_locations_dict = copy.deepcopy(self.location_dictionary)
        self.location = all_locations_dict.get(state[0])
Esempio n. 6
0
    def set_task_status(self, tsk_key, env_states_dict):
        ksp_states_dict = copy.deepcopy(self.kSP_states_dict)
        env_all_st = copy.deepcopy(env_states_dict)
        old_state_key = self.my_state
        old_state = ksp_states_dict.get(old_state_key)
        # print(" old_state:  ", old_state)

        old_state[1] = tsk_key

        general_st = helper.get_key_from_dict_by_value(old_state, env_all_st)
        self.my_state = self.rob_state_mapping.get(general_st)
Esempio n. 7
0
    def make_mapping_from_global_tasks(self, env_all_states):
        # print(" self.kSP_states_dict:  ", self.kSP_states_dict)
        ksp_states_dict = copy.deepcopy(self.kSP_states_dict)
        rob_state_mapping = {}
        for st in env_all_states:
            rob_loc = env_all_states.get(st)[0]
            tsk = env_all_states.get(st)[1]
            # print("\n st: ", st, " rob_loc:  ", rob_loc, "tsk: ", tsk)
            num_active_tasks = self.compute_num_active_tsks(tsk)
            # print("num_active_tasks:  ", num_active_tasks)
            if num_active_tasks <= self.k:
                ksp_state_key = helper.get_key_from_dict_by_value(
                    [rob_loc, tsk], ksp_states_dict)
                # print(" @@@@@@ [rob_loc, tsk]:  ", [rob_loc, tsk], "   ksp_state_key: ", ksp_state_key)
                rob_state_mapping[st] = ksp_state_key
            else:
                dist_list_tsks = self.compute_task_dist_to_rob_loc(
                    rob_loc, tsk)
                # print("   dist_list_tsks:  ", dist_list_tsks)
                k_nearest_tsks = self.compute_rob_k_nearest_tasks(
                    self.k, dist_list_tsks)
                # print("   k_nearest_tsks:  ", k_nearest_tsks)
                k_nearest_tsks.reverse()
                tsk_str = ''.join(k_nearest_tsks)
                tsk_decimal = int(tsk_str, 2)
                # print(" tsk_decimal: ", tsk_decimal)
                # tsk_mapping[tsk] = tsk_decimal
                new_state = [rob_loc, tsk_decimal]
                ksp_state_key = helper.get_key_from_dict_by_value(
                    new_state, ksp_states_dict)
                # print(" !!!!!! tsk: ", tsk, "  new_state:  ", new_state, "   ksp_state_key: ", ksp_state_key)
                rob_state_mapping[
                    st] = ksp_state_key  # helper.get_key_from_dict_by_value(new_state, env_all_states)

        self.rob_state_mapping = copy.deepcopy(rob_state_mapping)

        file_name = "output-data/tsk_mapping" + str(self.rob_id) + ".txt"
        f = open(file_name, "a+")
        f.write(str(rob_state_mapping) + "\n\n")
        f.close()
Esempio n. 8
0
 def modify_rob_state_by_tsk_change(self, tsk_sts_key, all_states_dict):
     """
     changing robot's state when task status changes but robot's position is fixed.
     :param tsk_sts_key:
     :param all_states_dict:
     :return:
     """
     all_st = copy.deepcopy(all_states_dict)
     current_state_key = self.my_state
     current_state_list = all_st[current_state_key]
     current_pos_key = current_state_list[0]
     new_state_list = [current_pos_key, tsk_sts_key]
     self.my_state = helper.get_key_from_dict_by_value(
         new_state_list, all_st)
Esempio n. 9
0
    def generate_new_tasks(self, task_status_key, file_id, iteration,
                           task_generation_rate):

        rob_location_keies = []
        for j in range(len(self.all_robs_loc)):
            # print(" j:  ", j, " ,  self.all_robs_loc[j]:", self.all_robs_loc[j])
            rob_location_keies.append(
                helper.get_key_from_dict_by_value(self.all_robs_loc[j],
                                                  self.rob_loc_dict))
        # print(" **********  rob_location_keies:  ", rob_location_keies)

        rob_task_related_indices = []
        for k in range(len(rob_location_keies)):
            rob_task_related_indices.append(self.num_rows * self.num_columns -
                                            1 - rob_location_keies[k])
            #  = self.num_rows * self.num_columns - 1 - rob_loc

        # print("rob_task_related_indices:  ", rob_task_related_indices)

        is_dirt = np.binary_repr(task_status_key,
                                 width=self.num_columns * self.num_rows)
        new_task_generated = False
        new_is_dirt = []
        for i in range(len(is_dirt)):
            if is_dirt[i] == '0':
                rand_num = np.random.rand()
                # print("  *********  i:  ", i)
                # print("helper.is_included_in_list(rob_task_related_indices: ", helper.is_included_in_list(rob_task_related_indices, i))
                if rand_num <= task_generation_rate and not helper.is_included_in_list(
                        rob_task_related_indices, i):
                    new_is_dirt.append('1')
                    # print(" $#@#$$#@  rand_num: ", rand_num, "\n")
                    new_task_generated = True
                else:
                    new_is_dirt.append('0')
            else:  # element == '1':
                new_is_dirt.append('1')

        str_new_dirt = ''.join(new_is_dirt)
        new_dirt_key = int(str_new_dirt, 2)
        # if new_task_generated:
        #     file_name = "output-data/newTask" + file_id + ".txt"
        #     fnewtsk = open(file_name, "a+")
        #     fnewtsk.write(" iteration:  " + str(iteration) + "\t \t")
        #     fnewtsk.write(str(new_dirt_key) + "\n")
        #     fnewtsk.close()
        return new_dirt_key
Esempio n. 10
0
    def compute_presence_mass(self, loc_dict, all_st_dict, all_rob_loc_list,
                              tsk_key, horizon, all_rob_ids, file_id):

        others_loc = []
        for counter in range(len(all_rob_loc_list)):
            if not all_rob_ids[counter] == self.rob_id:
                others_loc.append(all_rob_loc_list[counter])

        all_robot_loc_dict = copy.deepcopy(loc_dict)
        all_loc_keys = list(all_robot_loc_dict.keys())
        all_loc_keys.sort()

        others_loc_key = []
        for location in others_loc:
            others_loc_key.append(
                helper.get_key_from_dict_by_value(location,
                                                  all_robot_loc_dict))

        others_st_keys = []  # [other_loc_key, tsk_key]
        for loc_key in others_loc_key:
            others_st_keys.append(
                helper.get_key_from_dict_by_value([loc_key, tsk_key],
                                                  all_st_dict))

        p0 = []
        for _ in others_loc:
            zero_prob = []
            for _ in all_st_dict:
                zero_prob.append(0.0)
            p0.append(zero_prob)

        for i in range(len(p0)):
            p0[i][others_st_keys[i]] = 1.0

        all_st_keys = list(all_st_dict.keys())
        all_st_keys.sort()

        p_mass = []  # [p0]
        for _ in range(len(others_loc)):
            p_mass.append([])

        for agent_counter in range(len(p_mass)):
            for h in range(horizon):
                probs = []
                for st_next in all_st_keys:
                    sum_probs = 0.0
                    for st in all_st_keys:
                        p0_st = p0[agent_counter][st]

                        policy_vec = self.make_policy_vec_by_q_values(st)

                        trans_vec = [
                            self.get_transition(st, action, st_next)
                            for action in self.all_actions
                        ]

                        dp = helper.dot_product(policy_vec, trans_vec)

                        sum_probs += dp * p0_st
                    probs.append(sum_probs)
                sum_probabilities = sum(probs)
                normalized_prob = [x / sum_probabilities for x in probs]
                p_mass[agent_counter].append(normalized_prob)
                p0[agent_counter] = copy.deepcopy(normalized_prob)
        print(" %%%%   p0:  ", p0)

        # computed p_mass is related to states that consist of task status and robot location. However for calculating
        # the best response, the probability of being at a particular location is important. So we sum to get just
        # robot position probability:
        # ************  Initializing
        pmass_loc = []
        for age in range(len(p_mass)):
            p_mass_loc_horizon = []
            for h in range(len(p_mass[0])):
                pmass_loc_init = []
                for i in range(len(all_loc_keys)):
                    pmass_loc_init.append(0.0)
                p_mass_loc_horizon.append(pmass_loc_init)
            pmass_loc.append(p_mass_loc_horizon)
        # ************  Summing
        for ag in range(len(p_mass)):
            for h in range(len(p_mass[0])):
                for key in all_st_keys:
                    pmass_loc[ag][h][all_st_dict.get(key)
                                     [0]] += p_mass[ag][h][key]

        # ************  Normalizing
        p_mass_normalized = []
        for agent_counter in range(len(pmass_loc)):
            p_mass_new = []
            for p_horizon in pmass_loc[agent_counter]:
                p_mass_new.append(helper.normalize_probability(p_horizon))
            p_mass_normalized.append(p_mass_new)

        return p_mass_normalized
Esempio n. 11
0
    def set_my_state(self, state, all_states_dict):
        all_st = copy.deepcopy(all_states_dict)
        self.my_state = helper.get_key_from_dict_by_value(state, all_st)

        all_locations_dict = copy.deepcopy(self.location_dictionary)
        self.location = all_locations_dict.get(state[0])
Esempio n. 12
0
    def execute_action(self, action, rob_loc, tsk_status_deci):
        # print("action, rob_loc, tsk_status_deci: ", action, rob_loc, tsk_status_deci)
        all_st = copy.deepcopy(self.all_states)

        all_robot_loc_dict = copy.deepcopy(self.rob_loc_dict)
        # for key4, val4 in all_robot_loc_dict.items():
        #     if val4 == rob_loc:
        #         loc_key = key4
        #         break
        loc_key = helper.get_key_from_dict_by_value(rob_loc,
                                                    all_robot_loc_dict)

        # for key8, val8 in all_st.items():
        #     if val8 == [loc_key, tsk_status_deci]:
        #         current_state_key = key8
        #         break

        # Using random simulation to find the next state:
        # s_probs = [self.get_transition(current_state_key, action,  sj) for sj in all_st.keys()]
        # state = all_st.get(helper.draw_arg(s_probs))
        # return state

        # We use this instead of simulation because we need to fix rate of generating new tasks in the system, even
        # when there are multiple robots in the environment
        if action == '1':  # stay in place
            # print(" ******** action == '1'  ******* ")
            tsk_status_bin_str = np.binary_repr(tsk_status_deci,
                                                width=self.num_columns *
                                                self.num_rows)
            # print(" FIRST tsk_status_bin_str:   ", tsk_status_bin_str)
            tsk_status_bin_list = []
            for elm in tsk_status_bin_str:
                tsk_status_bin_list.append(elm)
            rob_task_index = self.num_rows * self.num_columns - 1 - loc_key
            tsk_status_bin_list[rob_task_index] = '0'
            str_new_task_status = ''.join(tsk_status_bin_list)
            new_task_status = int(str_new_task_status, 2)
            # print(" new_task_status:   ", new_task_status)
            return [loc_key, new_task_status]
        else:
            rnd_number = np.random.randint(low=0, high=10)
            if rnd_number == 0:  # No motion just for random error :(
                # print("rnd_number:  0 ", rnd_number)
                return [loc_key, tsk_status_deci]
            else:
                current_rob_loc = copy.deepcopy(self.rob_loc_dict.get(loc_key))
                # print(" current_rob_loc:   ", current_rob_loc)
                if action == '2':  # North:2
                    if current_rob_loc[0] == self.num_rows - 1:
                        return [loc_key, tsk_status_deci]
                    else:  # if not current_rob_loc[0] == self.num_rows - 1
                        current_rob_loc[0] += 1
                elif action == '3':  # South:3
                    if current_rob_loc[0] == 0:
                        return [loc_key, tsk_status_deci]
                    else:  # if not current_rob_loc[0] == 0
                        current_rob_loc[0] -= 1
                elif action == '4':  # East:4
                    if current_rob_loc[1] == self.num_columns - 1:
                        return [loc_key, tsk_status_deci]
                    else:  # if not current_rob_loc[1] == self.num_columns - 1:
                        current_rob_loc[1] += 1
                elif action == '5':  # West:5
                    if current_rob_loc[1] == 0:
                        return [loc_key, tsk_status_deci]
                    else:  # if not current_rob_loc[1] == 0
                        current_rob_loc[1] -= 1

                for key5, val5 in self.rob_loc_dict.items():
                    if val5 == current_rob_loc:
                        new_loc_key = key5

                # print(" return  [new_loc_key, tsk_status_deci]",  [new_loc_key, tsk_status_deci])
                return [new_loc_key, tsk_status_deci]