def get_reward(self): reward_list = [] if self.in_collision: self.in_collision = False return [self.obstacle_punishment] * (self.goal_set.__len__()) counter = 0 #for debugging for goal in self.goal_set: reward = 0 current_loc = Position(x=self.x, y=self.y, yaw=self.radian) d_location = current_loc - goal d_angle = math.fabs(goal.yaw - current_loc.yaw) if counter == self.active_goal: rospy.loginfo("Location Diff: %f", d_location) #Give reward based on distance if d_location <= 1.0 and d_location > self.goal_xy_threshold: reward += 2 elif d_location <= 1.5 and d_location > 1.0: reward += 1 elif d_location <= 2.0 and d_location > 1.5: reward += 0.5 elif d_location <= 3.0 and d_location > 2.0: reward += -0.2 elif d_location > 3.0: reward += -1.0 #Check location distance for reward if d_location <= self.goal_xy_threshold: reward += self.location_fix_reward else: reward += self.fix_punishment #If robot was in good location, check for angular precision if d_location <= self.goal_xy_threshold: if d_angle <= self.goal_theta_threshold: reward = self.angular_fix_reward rospy.logdebug("Reached a Goal") if counter == self.active_goal: rospy.logwarn("Reached Current Goal.") else: pass reward_list.append(reward) counter += 1 return reward_list
def __init__(self, base_path, reference_frame, x_init, y_init, goals, ordered_list_index=[(0, 0, 0)], g_l_for_parent=[], full_info=[], **kwargs): ''' Take Note: Important Information. The multi goal for maze x,y index are rotated. Therefore we have to compensate for that here. In ''' self.running = False # Block callbacks to run self.mg_running = False super(MultiGoalStage, self).__init__(base_path, reference_frame, x_init, y_init, g_l_for_parent, ordered_list_index, **kwargs) self.nfq_args = {'epsilon':(0.1, 0.1) \ , 'discount_factor':0.99, 'random_decay': 0.995\ , 'explore_strategy':1, 'activation_function':1 , 'learning_rate':0.1, 'temperature':0.20, 'max_replay_size':50000} self._prepare_RL_variables() self.goals = goals self.goal_set = [] self.full_info = full_info for x_g, y_g, x_ind, y_ind in self.goals: self.goal_set.append( Position(x=x_g, y=y_g, yaw=0, x_ind=y_ind, y_ind=x_ind)) self.reached_goals = [0] * len(self.goal_set) # Select below if goal and current state are part of input # ev_size = self.structure.size * 2 # Select below if only current state is the input # FOR CAFE ROOM = 1128? # FOR KITCHEN 154 or 484 self.x_len = 47 self.y_len = 24 # FOR CAFFE 47 / 24 ev_size = 1128 # 154 self.empty_vector = numpy.zeros((1, ev_size), dtype=theano.config.floatX) self.empty_vector = numpy.reshape(self.empty_vector, -1) # Which goal to follow! Default is the goal with highest td self.active_goal = 0 self.base_path = os.path.join(self.lowest_path, 'goal_' + str(self.active_goal) + '/') # which init position to start from self.init_index = 0 self.init_no = len(self.full_info[0][1]) self.RL.select_action( self.empty_vector, self.active_goal) # RL needs to be initialized first # self.active_goal = self.select_active_goal() rospy.loginfo("First Active Goal is: %d", self.active_goal) self.steps_number = 1 self.trial_max_steps = 2000 # Used for plotting self.n_steps_list = [] self.actions_numbers = [] self.cum_reward_list = [] self.consecutive_succes = list(np.zeros(15)) self.actions_numbers = list(np.zeros(200)) self.fig1_data = [] self.fig2_data = [] self.last_xp_replay_err = float('Inf') self.failed_action_queue = [] if kwargs: self.set_parameters(**kwargs) self.running = True # Allow callbacks to run self.mg_running = True self.save_timer = time.time() self.save_interval = 3600.0 #seconds
def __init__(self, base_path, reference_frame, x_init, y_init, goals, ordered_list_index=[(0, 0, 0)], g_l_for_parent=[], full_info=[], **kwargs): self.running = False #Block callbacks to run self.mg_running = False super(MultiGoalStage, self).__init__(base_path, reference_frame, x_init, y_init, g_l_for_parent, ordered_list_index, **kwargs) self.nfq_args = {'epsilon':(0.1,0.1) \ ,'discount_factor':0.9, 'random_decay': 0.995\ , 'explore_strategy':3, 'activation_function':1 , 'learning_rate':0.3} self._prepare_RL_variables() self.goals = goals self.goal_set = [] self.full_info = full_info for x_g, y_g, x_ind, y_ind in self.goals: self.goal_set.append( Position(x=x_g, y=y_g, yaw=0, x_ind=x_ind, y_ind=y_ind)) self.reached_goals = [0] * len(self.goal_set) #Select below if goal and current state are part of input #ev_size = self.structure.size * 2 #Select below if only current state is the input ev_size = 484 self.empty_vector = numpy.zeros((1, ev_size), dtype=theano.config.floatX) self.empty_vector = numpy.reshape(self.empty_vector, -1) #Which goal to follow! Default is the goal with highest td self.active_goal = 2 self.base_path = os.path.join(self.lowest_path, 'goal_' + str(self.active_goal) + '/') #which init position to start from self.init_index = 0 self.init_no = len(self.full_info[0][1]) self.RL.select_action( self.empty_vector, self.active_goal) #RL needs to be initialized first #self.active_goal = self.select_active_goal() rospy.loginfo("First Active Goal is: %d", self.active_goal) self.steps_number = 1 self.trial_max_steps = 200 #Used for plotting self.n_steps_list = [] self.actions_numbers = [] self.cum_reward_list = [] self.consecutive_succes = list(np.zeros(15)) self.actions_numbers = list(np.zeros(200)) self.fig1_data = [] self.fig2_data = [] if kwargs: self.set_parameters(**kwargs) self.running = True #Allow callbacks to run self.mg_running = True