Ejemplo n.º 1
0
    def get_reward(self):
        reward_list = []

        if self.in_collision:
            self.in_collision = False
            return [self.obstacle_punishment] * (self.goal_set.__len__())

        counter = 0  #for debugging
        for goal in self.goal_set:

            reward = 0
            current_loc = Position(x=self.x, y=self.y, yaw=self.radian)

            d_location = current_loc - goal
            d_angle = math.fabs(goal.yaw - current_loc.yaw)

            if counter == self.active_goal:
                rospy.loginfo("Location Diff: %f", d_location)

            #Give reward based on distance
            if d_location <= 1.0 and d_location > self.goal_xy_threshold:
                reward += 2
            elif d_location <= 1.5 and d_location > 1.0:
                reward += 1
            elif d_location <= 2.0 and d_location > 1.5:
                reward += 0.5
            elif d_location <= 3.0 and d_location > 2.0:
                reward += -0.2
            elif d_location > 3.0:
                reward += -1.0

            #Check location distance for reward
            if d_location <= self.goal_xy_threshold:
                reward += self.location_fix_reward
            else:
                reward += self.fix_punishment

            #If robot was in good location, check for angular precision
            if d_location <= self.goal_xy_threshold:
                if d_angle <= self.goal_theta_threshold:
                    reward = self.angular_fix_reward
                    rospy.logdebug("Reached a Goal")
                    if counter == self.active_goal:
                        rospy.logwarn("Reached Current Goal.")
                else:
                    pass

            reward_list.append(reward)
            counter += 1

        return reward_list
Ejemplo n.º 2
0
    def __init__(self,
                 base_path,
                 reference_frame,
                 x_init,
                 y_init,
                 goals,
                 ordered_list_index=[(0, 0, 0)],
                 g_l_for_parent=[],
                 full_info=[],
                 **kwargs):
        '''
        Take Note: Important Information.
        The multi goal for maze x,y index are rotated. Therefore we have to compensate for that here. In
        '''

        self.running = False  # Block callbacks to run
        self.mg_running = False
        super(MultiGoalStage,
              self).__init__(base_path, reference_frame, x_init, y_init,
                             g_l_for_parent, ordered_list_index, **kwargs)
        self.nfq_args = {'epsilon':(0.1, 0.1) \
                , 'discount_factor':0.99, 'random_decay': 0.995\
                , 'explore_strategy':1, 'activation_function':1
                , 'learning_rate':0.1, 'temperature':0.20, 'max_replay_size':50000}
        self._prepare_RL_variables()

        self.goals = goals
        self.goal_set = []
        self.full_info = full_info
        for x_g, y_g, x_ind, y_ind in self.goals:
            self.goal_set.append(
                Position(x=x_g, y=y_g, yaw=0, x_ind=y_ind, y_ind=x_ind))
        self.reached_goals = [0] * len(self.goal_set)

        # Select below if goal and current state are part of input
        # ev_size = self.structure.size * 2
        # Select below if only current state is the input
        # FOR CAFE ROOM = 1128?
        # FOR KITCHEN 154 or 484
        self.x_len = 47
        self.y_len = 24  # FOR CAFFE 47 / 24
        ev_size = 1128  # 154
        self.empty_vector = numpy.zeros((1, ev_size),
                                        dtype=theano.config.floatX)
        self.empty_vector = numpy.reshape(self.empty_vector, -1)

        # Which goal to follow! Default is the goal with highest td
        self.active_goal = 0
        self.base_path = os.path.join(self.lowest_path,
                                      'goal_' + str(self.active_goal) + '/')
        # which init position to start from
        self.init_index = 0
        self.init_no = len(self.full_info[0][1])
        self.RL.select_action(
            self.empty_vector,
            self.active_goal)  # RL needs to be initialized first
        # self.active_goal = self.select_active_goal()

        rospy.loginfo("First Active Goal is: %d", self.active_goal)

        self.steps_number = 1
        self.trial_max_steps = 2000

        # Used for plotting
        self.n_steps_list = []
        self.actions_numbers = []
        self.cum_reward_list = []
        self.consecutive_succes = list(np.zeros(15))
        self.actions_numbers = list(np.zeros(200))
        self.fig1_data = []
        self.fig2_data = []

        self.last_xp_replay_err = float('Inf')
        self.failed_action_queue = []

        if kwargs:
            self.set_parameters(**kwargs)

        self.running = True  # Allow callbacks to run
        self.mg_running = True
        self.save_timer = time.time()
        self.save_interval = 3600.0  #seconds
Ejemplo n.º 3
0
    def __init__(self,
                 base_path,
                 reference_frame,
                 x_init,
                 y_init,
                 goals,
                 ordered_list_index=[(0, 0, 0)],
                 g_l_for_parent=[],
                 full_info=[],
                 **kwargs):

        self.running = False  #Block callbacks to run
        self.mg_running = False
        super(MultiGoalStage,
              self).__init__(base_path, reference_frame, x_init, y_init,
                             g_l_for_parent, ordered_list_index, **kwargs)
        self.nfq_args = {'epsilon':(0.1,0.1) \
                ,'discount_factor':0.9, 'random_decay': 0.995\
                , 'explore_strategy':3, 'activation_function':1
                , 'learning_rate':0.3}
        self._prepare_RL_variables()

        self.goals = goals
        self.goal_set = []
        self.full_info = full_info
        for x_g, y_g, x_ind, y_ind in self.goals:
            self.goal_set.append(
                Position(x=x_g, y=y_g, yaw=0, x_ind=x_ind, y_ind=y_ind))
        self.reached_goals = [0] * len(self.goal_set)

        #Select below if goal and current state are part of input
        #ev_size = self.structure.size * 2
        #Select below if only current state is the input
        ev_size = 484
        self.empty_vector = numpy.zeros((1, ev_size),
                                        dtype=theano.config.floatX)
        self.empty_vector = numpy.reshape(self.empty_vector, -1)

        #Which goal to follow! Default is the goal with highest td
        self.active_goal = 2
        self.base_path = os.path.join(self.lowest_path,
                                      'goal_' + str(self.active_goal) + '/')
        #which init position to start from
        self.init_index = 0
        self.init_no = len(self.full_info[0][1])
        self.RL.select_action(
            self.empty_vector,
            self.active_goal)  #RL needs to be initialized first
        #self.active_goal = self.select_active_goal()

        rospy.loginfo("First Active Goal is: %d", self.active_goal)

        self.steps_number = 1
        self.trial_max_steps = 200

        #Used for plotting
        self.n_steps_list = []
        self.actions_numbers = []
        self.cum_reward_list = []
        self.consecutive_succes = list(np.zeros(15))
        self.actions_numbers = list(np.zeros(200))
        self.fig1_data = []
        self.fig2_data = []

        if kwargs:
            self.set_parameters(**kwargs)

        self.running = True  #Allow callbacks to run
        self.mg_running = True