def step(self, action): roll, pitch, yaw, thrust = action[0], action[1], action[2], action[3] # update hunter # dronesim.simcontrol([roll, pitch, yaw, thrust]) dronesim.simrun( int(1e9 / self.fps), [roll, pitch, yaw, thrust]) #transform from second to nanoseconds # update state self.state = self.get_state() # calculate reward and check if done reward = ( self.previous_distance - self.distance ) * 500 # even if chasing at max speed, the reward will be 10 / 100 * 500 = 50 self.previous_distance = self.distance done = False reason = None is_in_view = [ (self.min_relative_x < coordinate[0] < self.max_relative_x and self.min_relative_y < coordinate[1] < self.max_relative_y) for coordinate in self.coordinate_queue ] ''' if True not in is_in_view: done = True reward = 0 self.episodes += 1 self.iteration = 0 ''' if self.distance > self.max_detect_distance or self.distance < self.min_detect_distance or self.iteration > 100000: done = True reward = 0 self.episodes += 1 self.iteration = 0 if self.distance < self.min_detect_distance: reward = 200 reason = 1 if self.distance > self.max_detect_distance: reason = 2 if self.iteration > 100000: reason = 3 # control parameter self.iteration += 1 return self.state, reward, done, { 'distance': self.distance, 'reason': reason }
def step(self, action): roll, pitch, yaw, thrust = action[0], action[1], action[2], action[3] if self.iteration % 50 == 0: self.roll_target = min( max(self.roll_target + 0.33 * np.random.randn(), -1), 1) self.pitch_target = min( max(self.pitch_target + 0.33 * np.random.randn(), -1), 1) self.yaw_target = min( max(self.yaw_target + 0.33 * np.random.randn(), -1), 1) self.thrust_target = min( max(self.thrust_target + 0.33 * np.random.randn(), -1), 1) # update hunter # dronesim.simcontrol([roll, pitch, yaw, thrust], [self.roll_target, self.pitch_target, self.yaw_target, self.thrust_target]) dronesim.simrun(int(1e9 / self.fps), [roll, pitch, yaw, thrust], [ self.roll_target, self.pitch_target, self.yaw_target, self.thrust_target ]) #transform from second to nanoseconds ##################### self.test_roll_hunter = roll self.test_pitch_hunter = pitch self.test_yaw_hunter = yaw self.test_thrust_hunter = thrust ##################### # update state self.state = self.get_state() # calculate reward and check if done reward = ( self.previous_distance - self.distance ) * 1000 # even if chasing at max speed, the reward will be 10 / 100 * 500 = 50 self.previous_distance = self.distance done = False is_in_view = [ (self.min_relative_x < coordinate[0] < self.max_relative_x and self.min_relative_y < coordinate[1] < self.max_relative_y) if coordinate[0] != -1 else False for coordinate in self.coordinate_queue ] if True not in is_in_view: done = True reward = 0 self.episodes += 1 self.iteration = 0 if self.coordinate_queue[-1][0] != -1: error_x, error_y = abs(self.coordinate_queue[-1][0] - 0.5), abs(self.coordinate_queue[-1][1] - 0.5) # reward = reward - 20*error_x - 10*error_y - 20*(error_x*error_y) # so the largest possible punishment is 20 reward = reward - 300 * np.linalg.norm( [error_x / self.distance, error_y / self.distance]) # reward = -100 * np.linalg.norm([error_x/self.distance, error_y/self.distance, (1/self.distance - 1)/5]) else: reward = -100 if self.distance > self.max_detect_distance or self.distance < self.min_detect_distance or self.iteration > self.max_iteration: done = True reward = 0 self.episodes += 1 self.iteration = 0 if self.distance < self.min_detect_distance: reward = 200 if self.iteration > self.max_iteration: reward = 0 if self.flag: done = True reward = 0 self.iteration = 0 # control parameter self.iteration += 1 return self.state, reward, done, {'distance': self.distance}
def step(self, action): roll, pitch, yaw, thrust = action[0], action[1], action[2], action[3] # print('norm: ', roll, pitch, yaw) # _, ori_hunter, _, pos_target, ori_target, _, _ = dronesim.siminfo() # roll_target, pitch_target, yaw_target = ori_target[0], ori_target[1], ori_target[2] # print('target: ', roll_target, pitch_target, yaw_target) # print('hunter: ', ori_hunter[0], ori_hunter[1], ori_hunter[2]) # print('diff: ', ori_hunter[0] - roll_target, ori_hunter[1] - pitch_target, ori_hunter[2] - yaw_target) if self.iteration % 50 == 0: self.roll_target = min( max(self.roll_target + 0.33 * np.random.randn(), -1), 1) self.pitch_target = min( max(self.pitch_target + 0.33 * np.random.randn(), -1), 1) self.yaw_target = min( max(self.yaw_target + 0.33 * np.random.randn(), -1), 1) self.thrust_target = min( max(self.thrust_target + 0.33 * np.random.randn(), -1), 1) # print('norm_target: ', self.roll_target, self.pitch_target, self.yaw_target) # update hunter # dronesim.simcontrol([roll, pitch, yaw, thrust], [self.roll_target, self.pitch_target, self.yaw_target, self.thrust_target]) dronesim.simrun(int(1e9 / self.fps), [roll, pitch, yaw, thrust], [ self.roll_target, self.pitch_target, self.yaw_target, self.thrust_target ]) #transform from second to nanoseconds # update state self.state = self.get_state() # calculate reward and check if done reward = ( self.previous_distance - self.distance ) * 1000 # even if chasing at max speed, the reward will be 10 / 100 * 500 = 50 self.previous_distance = self.distance # print('reward: ', reward) done = False reason = None is_in_view = [ (self.min_relative_x < coordinate[0] < self.max_relative_x and self.min_relative_y < coordinate[1] < self.max_relative_y) if coordinate[0] != -1 else False for coordinate in self.coordinate_queue ] ''' if True not in is_in_view: done = True reward = 0 self.episodes += 1 self.iteration = 0 ''' # print('reward of first term: ', reward) # print(self.coordinate_queue[-1]) if self.coordinate_queue[-1][0] != -1: error_x, error_y = abs(self.coordinate_queue[-1][0] - 0.5), abs(self.coordinate_queue[-1][1] - 0.5) # reward = reward - 20*error_x - 10*error_y - 20*(error_x*error_y) # so the largest possible punishment is 20 reward = reward - 200 * np.linalg.norm( [error_x / self.distance, error_y / self.distance]) else: reward = -100 if self.distance > self.max_detect_distance * 2 or self.distance < self.min_detect_distance or self.iteration > 100000: done = True reward = 0 self.episodes += 1 self.iteration = 0 if self.distance < self.min_detect_distance: reward = 200 reason = 1 if self.distance > self.max_detect_distance: reason = 2 if self.iteration > 100000: reason = 3 # control parameter self.iteration += 1 # print('reward: ', reward) return self.state, reward, done, { 'distance': self.distance, 'reason': reason }