Ejemplo n.º 1
0
 def attitude_limit(self):
     """
     limit attitude
     :return:
     """
     att_overlimit = False
     euler = quat2euler(self.state[6:10])
     r = euler[0]
     p = euler[1]
     y = euler[2]
     if np.abs(r) >= deg2rad(85):
         attitude_limited = euler2quat(
             np.array([np.sign(r) * deg2rad(85), p, y]))
         att_overlimit = True
     if np.abs(p) >= deg2rad(85):
         attitude_limited = euler2quat(
             np.array([r, np.sign(p) * deg2rad(85), y]))
         att_overlimit = True
     if np.abs(y) >= deg2rad(175):
         attitude_limited = euler2quat(
             np.array([r, p, np.sign(y) * deg2rad(175)]))
         att_overlimit = True
     if np.abs(r) <= deg2rad(85) and np.abs(p) <= deg2rad(85) and np.abs(
             y) <= deg2rad(175):
         attitude_limited = self.state[6:10]
         att_overlimit = False
     return att_overlimit, attitude_limited
Ejemplo n.º 2
0
    def __init__(self):
        self.drone = Drone()

        self.state = np.zeros(13)

        self.steps_beyond_done = None

        self.pos_threshold = 1
        self.vel_threshold = 0.1

        ini_pos = np.array([0.0, 0.0, 5.0]) + np.random.uniform(-1, 1, (3, ))
        ini_att = euler2quat(
            np.array([deg2rad(0), deg2rad(0), 0]) +
            np.random.uniform(-0.2, 0.2, (3, )))
        ini_angular_rate = np.array([0, deg2rad(0), 0])
        self.ini_state = np.zeros(13)
        self.ini_state[0:3] = ini_pos
        self.ini_state[6:10] = ini_att
        self.ini_state[10:] = ini_angular_rate

        pos_des = np.array([0.0, 0.0, 5.0])  # [x, y, z]
        att_des = euler2quat(np.array([deg2rad(0), deg2rad(0), deg2rad(0)]))
        self.state_des = np.zeros(13)
        self.state_des[0:3] = pos_des
        self.state_des[6:10] = att_des

        low = self.drone.state_lim_low
        high = self.drone.state_lim_high

        self.action_space = spaces.Box(low=np.array([0.0, 0.0, 0.0, 0.0]),
                                       high=np.array([1.0, 1.0, 1.0, 1.0]))
        self.observation_space = spaces.Box(low=low, high=high)
        self.action_max = np.array([1.0, 1.0, 1.0, 1.0
                                    ]) * self.drone.mass * self.drone.gravity

        self.seed()
Ejemplo n.º 3
0
    def __init__(self):

        self.chaser = Drone()
        self.target = Drone()
        self.target_controller = controller(self.target.get_arm_length(), self.target.get_mass())

        self.state_chaser = np.zeros(13)
        self.state_target = np.zeros(13)
        self.rel_state = np.zeros(12)
        self.t = 0

        self.done = False
        self.reward = 0.0
        self.shaping = 0.0
        self.last_shaping = 0.0

        # self.steps_beyond_done = None

        # Chaser Initial State
        chaser_ini_pos = np.array([8, -50, 5]) + np.random.uniform(-0.3, 0.3, (3,))
        chaser_ini_vel = np.array([0, 0, 0])# + np.random.uniform(-0.1, 0.1, (3,))
        chaser_ini_att = euler2quat(np.array([0.0, 0.0, 0.0]))# + np.random.uniform(-0.2, 0.2, (3,)))
        chaser_ini_angular_rate = np.array([0.0, 0.0, 0.0]) #+ np.random.uniform(-0.1, 0.1, (3,))
        self.chaser_dock_port = np.array([0.1, 0.0, 0.0])
        self.chaser_ini_state = np.zeros(13)
        self.chaser_ini_state[0:3] = chaser_ini_pos
        self.chaser_ini_state[3:6] = chaser_ini_vel
        self.chaser_ini_state[6:10] = chaser_ini_att
        self.chaser_ini_state[10:] = chaser_ini_angular_rate
        self.state_chaser = self.chaser.reset(self.chaser_ini_state, self.chaser_dock_port)

        # Target Initial State
        target_ini_pos = np.array([10, -50, 5])
        target_ini_vel = np.array([0.0, 0.0, 0.0])
        target_ini_att = euler2quat(np.array([0.0, 0.0, 0.0]))
        target_ini_angular_rate = np.array([0.0, 0.0, 0.0])
        self.target_dock_port = np.array([-0.1, 0, 0])
        self.target_ini_state = np.zeros(13)
        self.target_ini_state[0:3] = target_ini_pos
        self.target_ini_state[3:6] = target_ini_vel
        self.target_ini_state[6:10] = target_ini_att
        self.target_ini_state[10:] = target_ini_angular_rate
        self.state_target = self.target.reset(self.target_ini_state, self.target_dock_port)

        # Target Final State
        target_pos_des = np.array([10, -50, 5])  # [x, y, z]
        target_att_des = euler2quat(np.array([0.0, 0.0, 0.0]))
        self.target_state_des = np.zeros(13)
        self.target_state_des[0:3] = target_pos_des
        self.target_state_des[6:10] = target_att_des

        # Final Relative Error
        self.rel_pos_threshold = 1
        self.rel_vel_threshold = 0.1
        self.rel_att_threshold = np.array([deg2rad(0), deg2rad(0), deg2rad(0)])
        self.rel_att_rate_threshold = np.array([deg2rad(0), deg2rad(0), deg2rad(0)])

        # chaser_dp = self.chaser.get_dock_port_state()  # drone A
        # target_dp = self.target.get_dock_port_state()  # drone B
        self.rel_state = state2rel(self.state_chaser, self.state_target, self.chaser.get_dock_port_state(),
                                   self.target.get_dock_port_state())

        # State Limitation
        chaser_low = self.chaser.state_lim_low
        chaser_high = self.chaser.state_lim_high

        target_low = self.target.state_lim_low
        target_high = self.target.state_lim_high

        # obs rel info: 12x1 [rel_pos, rel_vel, rel_rpy, rel_rpy_rate]
        self.obs_low = np.array(
            [-np.inf, -np.inf, -np.inf, -100, -100, -100, -np.pi, -np.pi / 2, -np.pi, -10 * np.pi, -10 * np.pi,
             -10 * np.pi])
        self.obs_high = np.array(
            [np.inf, np.inf, np.inf, 100, 100, 100, np.pi, np.pi / 2, np.pi, 10 * np.pi, 10 * np.pi, 10 * np.pi])

        # rel_low = np.array([60, 0, 100, 10, 10, 10, 1, 1, 1, 1, 10 * 2 * np.pi, 10 * 2 * np.pi, 10 * 2 * np.pi])

        self.action_space = spaces.Box(low=np.array([-1.0, -1.0, -1.0, -1.0]), high=np.array([1.0, 1.0, 1.0, 1.0]),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=self.obs_low, high=self.obs_high, dtype=np.float32)

        # self.action_max = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity
        self.action_mean = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity / 2.0
        self.action_std = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity / 2.0

        self.seed()
Ejemplo n.º 4
0
    def step(self, action):
        self.t += 1

        old_state_target = self.state_target
        old_state_chaser = self.state_chaser
        old_rel_state = self.rel_state
        old_chaser_dp = self.chaser.get_dock_port_state()

        action_chaser = self.chaser.rotor2control @ (self.action_std * action[:] + self.action_mean)
        # action_chaser = self.chaser.rotor2control @ (self.action_max * action[:])

        # action_target = action[4:]
        action_target = self.target_controller.PID(self.target_state_des, self.state_target)
        self.state_target = self.target.step(action_target)
        self.state_chaser = self.chaser.step(action_chaser)

        # dock port relative state
        chaser_dp = self.chaser.get_dock_port_state()  # drone A
        target_dp = self.target.get_dock_port_state()  # drone B

        self.rel_state = state2rel(self.state_chaser, self.state_target, chaser_dp, target_dp)
        # done_final = False
        # done_overlimit = False
        flag_docking = bool((np.linalg.norm(self.rel_state[0:3], 2) < 0.1)
                            and (np.linalg.norm(self.rel_state[3:6], 2) < 0.1)
                            and (np.abs(self.rel_state[6]) < deg2rad(10))
                            and (np.abs(self.rel_state[7]) < deg2rad(10))
                            and (np.abs(self.rel_state[8]) < deg2rad(10)))
        # and (np.linalg.norm(self.rel_state[9:], 2) < deg2rad(10))
        # and (np.abs(self.rel_state[6]) < (deg2rad(10.0)))
        # and (np.abs(self.rel_state[7]) < (deg2rad(10.0)))
        # # and (deg2rad(95.0) > np.abs(self.rel_state[8]) > deg2rad(85.0)))
        # and (np.abs(self.rel_state[8]) < deg2rad(10.0)))\

        done_overlimit = bool((np.linalg.norm(self.rel_state[0:3]) >= 3)
                              or self.state_chaser[2] <= 0.1)
                              # or np.abs(self.rel_state[6]) > (deg2rad(85.0))
                              # or np.abs(self.rel_state[7]) > (deg2rad(85.0))
                              # or np.abs(self.rel_state[8]) > (deg2rad(175.0)))
        # or (np.linalg.norm(self.rel_state[3:6], 2) > 10))
        # or np.linalg.norm(self.rel_state[9:], 2) > 5 * np.pi)
        # done_overlimit = bool(self.state_chaser[2] <= 0.1)

        done_overtime = bool(self.t >= 600)

        # reward /= 1000.0
        self.done = bool(done_overlimit or done_overtime)  # enb b  self.done=bool(done_overlimit)

        # self.done = done_overlimit

        reward_docked = 0
        if flag_docking:
            reward_docked = +1.0
            # + (0.02-np.linalg.norm(self.rel_state[0:3], 2)) \
            # + (0.01-np.linalg.norm(self.rel_state[3:6], 2)) \
            # + 0.1*(deg2rad(20.0) - np.linalg.norm(self.rel_state[6:9])) \

        reward_action = np.linalg.norm(action[:], 2)

        self.shaping = - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[0:3] / 3.0))) \
                       - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[3:6]))) \
                       - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[6:9] / np.pi))) \
                       - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[9:]))) \
                       - 0.1 * reward_action + 1.0 * reward_docked

        self.reward = self.shaping - self.last_shaping
        self.last_shaping = self.shaping

        # tbc if self.done: # self.state_chaser[2] = old_state_chaser[2] # self.state_chaser[3:] =
        # self.chaser_ini_state[3:] # self.rel_state = state2rel(self.state_chaser, self.state_target, old_chaser_dp,
        # target_dp) # self.reset() reward = -1000.0  # * np.sum(np.square(self.rel_state[0:3]))  # -0.01(x,j,
        # tb16) -0.1(x,ent +inf,k, tb17) -10(x,l,z_overlimit)-100.0 elif (not flag_docking) and (not self.done):
        # reward = - 0.002 * np.sum(np.square(self.rel_state[0:3]/100)) \ - 0.0002 * np.sum(np.square(self.rel_state[
        # 3:6]/100)) \ - 0.002 * np.sum(np.square(self.rel_state[6:9]/10)) \ - 0.0002 * np.sum(np.square(
        # self.rel_state[9:]/10)) \ - 0.0002 * reward_action \ + 0.1 # reward = -0.5 # - 0.001 * np.abs(
        # self.rel_state[3]) - 0.001 * np.abs(self.rel_state[4]) - 0.001 * np.abs(self.rel_state[5]) \ elif
        # flag_docking: reward = reward_docked #      - 0.01 * np.square(self.rel_state[0]) - 0.01 * np.square(
        # self.rel_state[1]) - 0.01 * np.square( # self.rel_state[2]) \ # - 0.01 * np.square(self.rel_state[6]) -
        # 0.01 * np.square(self.rel_state[7]) - 0.01 * np.square( # self.rel_state[8]) \ # - 0.01 * np.linalg.norm(
        # self.rel_state[9:], 2) else: reward = 0.0 raise AssertionError('Wrong Reward Signal')

        # reward += 0.1 * self.t

        info = {'chaser': self.state_chaser,
                'target': self.state_target,
                'flag_docking': flag_docking,
                'done_overlimit': done_overlimit}

        return self.rel_state, self.reward, self.done, info
Ejemplo n.º 5
0
from dynamics.quadrotor import Drone
from controller.PIDController import controller
from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad, quat2euler, euler2quat
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# print(rot2quat(euler2rot(np.array([0, 0, 0]))))
ini_pos = np.array([8, -50, 5])
ini_vel = np.array([0, 0, 0])
ini_att = euler2quat(np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)]))
ini_angular_rate = np.array([0, 0, 0])
ini_state = np.zeros(13)
ini_state[0:3] = ini_pos
ini_state[3:6] = ini_vel
ini_state[6:10] = ini_att
ini_state[10:] = ini_angular_rate

pos_des = np.array([10, -50, 5])  # [x, y, z]
vel_des = np.array([0, 0, 0])
att_des = euler2quat(np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)]))
state_des = np.zeros(13)
state_des[0:3] = pos_des
state_des[3:6] = vel_des
state_des[6:10] = att_des

# Initial a drone and set its initial state
quad1 = Drone()
quad1.reset(ini_state)

control = controller(quad1.get_arm_length(), quad1.get_mass())
Ejemplo n.º 6
0
    def step(self, action):
        # last_reward = self.reward
        # reward = 0.0
        # shaping = 0.0
        self.t += 1

        old_state_target = self.info['target']  # self.state_target
        old_state_chaser = self.state_chaser
        old_rel_state = self.rel_state
        old_chaser_dp = self.chaser.get_dock_port_state()

        action_chaser = self.chaser.rotor2control @ (
            self.action_std * action[:] + self.action_mean)
        # action_chaser = self.chaser.rotor2control @ (self.action_max * action[:])

        # action_target = action[4:]
        action_target = self.target_controller.vel_controller(
            self.target_state_des, self.state_target, old_state_target)
        self.state_target = self.target.step(action_target)
        self.state_chaser = self.chaser.step(action_chaser)

        # dock port relative state
        chaser_dp = self.chaser.get_dock_port_state()  # drone A
        target_dp = self.target.get_dock_port_state()  # drone B

        self.rel_state = state2rel(self.state_chaser, self.state_target,
                                   chaser_dp, target_dp)
        # done_final = False
        # done_overlimit = False
        flag_docking = bool((np.linalg.norm(self.rel_state[0:3], 2) < 0.1)
                            and (np.linalg.norm(self.rel_state[3:6], 2) < 0.1)
                            and (np.abs(self.rel_state[6]) < deg2rad(10))
                            and (np.abs(self.rel_state[7]) < deg2rad(10))
                            and (np.abs(self.rel_state[8]) < deg2rad(10)))
        # and (np.linalg.norm(self.rel_state[9:], 2) < deg2rad(10))
        # and (np.abs(self.rel_state[6]) < (deg2rad(10.0)))
        # and (np.abs(self.rel_state[7]) < (deg2rad(10.0)))
        # # and (deg2rad(95.0) > np.abs(self.rel_state[8]) > deg2rad(85.0)))
        # and (np.abs(self.rel_state[8]) < deg2rad(10.0)))\

        done_overlimit = bool((np.linalg.norm(self.rel_state[0:3]) >= 10)
                              or self.state_chaser[2] <= 0.1)
        # (np.linalg.norm(self.rel_state[0:3]) >= 3)
        #                   or self.state_chaser[2] <= 0.1)
        # or np.abs(self.rel_state[6]) > (deg2rad(85.0))
        # or np.abs(self.rel_state[7]) > (deg2rad(85.0))
        # or np.abs(self.rel_state[8]) > (deg2rad(175.0)))
        # or (np.linalg.norm(self.rel_state[3:6], 2) > 10))
        # or np.linalg.norm(self.rel_state[9:], 2) > 5 * np.pi)
        # done_overlimit = bool(self.state_chaser[2] <= 0.1)

        done_overtime = bool(self.t >= 600)

        # reward /= 1000.0
        self.done = bool(
            done_overlimit
            or done_overtime)  # enb b  self.done=bool(done_overlimit)

        # self.done = done_overlimit

        reward_docked = 0
        if flag_docking:
            reward_docked = +1.0
            # + (0.02-np.linalg.n orm(self.rel_state[0:3], 2)) \
            # + (0.01-np.linalg.norm(self.rel_state[3:6], 2)) \
            # + 0.1*(deg2rad(20.0) - np.linalg.norm(self.rel_state[6:9])) \

        reward_action = np.linalg.norm(action[:], 2)
        # reward_action = np.sum(np.abs(action[:]))

        self.shaping = - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[0:3] / 10))) \
                       - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[3:6]))) \
                       - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[6:9] / np.pi))) \
                       - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[9:]))) \
                       - 0.1 * reward_action + 1.0 * reward_docked

        self.reward = self.shaping - self.last_shaping
        self.last_shaping = self.shaping

        # reward += 0.1 * self.t

        self.info = {
            'chaser': self.state_chaser,
            'target': self.state_target,
            'flag_docking': flag_docking,
            'done_overlimit': done_overlimit
        }

        return self.rel_state, self.reward, self.done, self.info
Ejemplo n.º 7
0
from dynamics.quadrotor import Drone
from controller.PIDController import controller
from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad, quat2euler, euler2quat
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from controller.JoystickController import RCInput
import threading

# print(rot2quat(euler2rot(np.array([0, 0, 0]))))
ini_pos = np.array([0, 0, 0])
ini_att = euler2quat(np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)]))
ini_angular_rate = np.array([0, 0, deg2rad(0)])
ini_state = np.zeros(13)
ini_state[0:3] = ini_pos
ini_state[6:10] = ini_att
ini_state[10:] = ini_angular_rate

pos_des = np.array([0, 0, 0])  # [x, y, z]
att_des = np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)])
angular_rate_des = np.array([0, 0, deg2rad(0)])
state_des = np.zeros(13)
state_des[0:3] = pos_des
state_des[6:10] = euler2quat(att_des)
state_des[10:] = angular_rate_des

# Initial a drone and set its initial state
quad1 = Drone()
quad1.reset(ini_state)

control = controller(quad1.get_arm_length(), quad1.get_mass())
Ejemplo n.º 8
0
    def step(self, action):
        # last_reward = self.reward
        # reward = 0.0
        # shaping = 0.0
        self.t += 1

        old_state_target = self.state_target
        old_state_chaser = self.state_chaser
        old_rel_state = self.rel_state
        old_chaser_dp = self.chaser.get_dock_port_state()

        action_chaser = self.chaser.rotor2control @ (self.action_std * action[:] + self.action_mean)
        # action_chaser = self.chaser.rotor2control @ (self.action_max * action[:])

        action_target = self.target_controller.PID(self.target_state_des, self.state_target)
        self.state_target = self.target.step(action_target)
        self.state_chaser = self.chaser.step(action_chaser)

        self.chaser_pub_srv.send_state(int(self.t), self.state_chaser)
        self.target_pub_srv.send_state(int(self.t), self.state_target)

        img = ImageGrab.grab([0, 0, 1920, 1080])
        # img.convert('L')
        # time.sleep(0.1)
        resize_img = img.resize((320, 240), Image.ANTIALIAS)
        bbb = np.array(resize_img)
        self.obs = bbb

        # dock port relative state
        chaser_dp = self.chaser.get_dock_port_state()  # drone A
        target_dp = self.target.get_dock_port_state()  # drone B

        self.rel_state = state2rel(self.state_chaser, self.state_target, chaser_dp, target_dp)
        # done_final = False
        # done_overlimit = False
        flag_docking = bool((np.linalg.norm(self.rel_state[0:3], 2) < 0.1)
                            and (np.linalg.norm(self.rel_state[3:6], 2) < 0.1)
                            and (np.abs(self.rel_state[6]) < deg2rad(10))
                            and (np.abs(self.rel_state[7]) < deg2rad(10))
                            and (np.abs(self.rel_state[8]) < deg2rad(10)))

        done_overlimit = bool((np.linalg.norm(self.rel_state[0:3]) >= 3)
                              or self.state_chaser[2] <= 0.1)

        done_overtime = bool(self.t >= 600)

        self.done = bool(done_overlimit or done_overtime)

        reward_docked = 0
        if flag_docking:
            reward_docked = +1.0

        reward_action = np.linalg.norm(action[:], 2)

        self.shaping = - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[0:3] / 3.0))) \
                       - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[3:6]))) \
                       - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[6:9] / np.pi))) \
                       - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[9:]))) \
                       - 0.1 * reward_action + 1.0 * reward_docked

        self.reward = self.shaping - self.last_shaping
        self.last_shaping = self.shaping

        # reward += 0.1 * self.t

        info = {'chaser': self.state_chaser,
                'target': self.state_target,
                'flag_docking': flag_docking,
                'done_overlimit': done_overlimit}

        return self.obs, self.reward, self.done, info
Ejemplo n.º 9
0
from dynamics.quadrotor import Drone
from controller.PIDController import controller
from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# print(rot2quat(euler2rot(np.array([0, 0, 0]))))
ini_pos = np.array([0, 0, 10])
ini_att = rot2quat(euler2rot(np.array([deg2rad(0), deg2rad(0), 0])))
ini_angular_rate = np.array([0, deg2rad(0), 0])

ini_state_qd1 = np.zeros(13)
ini_state_qd1[0:3] = ini_pos
ini_state_qd1[6:10] = ini_att
ini_state_qd1[10:] = ini_angular_rate

ini_state_qd2 = np.zeros(13)
ini_state_qd2[0:3] = ini_pos
ini_state_qd2[6:10] = ini_att
ini_state_qd2[10:] = ini_angular_rate

att_des = rot2quat(euler2rot(np.array([deg2rad(0), deg2rad(0), deg2rad(0)])))
pos_des = np.array([0.1, 0, 8.2])  # [x, y, z]
state_des = np.zeros(13)
state_des[0:3] = pos_des
state_des[6:10] = att_des

# Initial a drone and set its initial state
quad1 = Drone()
quad1.reset(ini_state_qd1)
Ejemplo n.º 10
0
import gym
from gym import wrappers, logger
import numpy as np
from controller.PIDController import controller
from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad, euler2quat

att_des = euler2quat(np.array([deg2rad(0), deg2rad(0), deg2rad(0)]))
pos_des = np.array([0, 0, 1])  # [x, y, z]
state_des = np.zeros(13)
state_des[0:3] = pos_des
state_des[6:10] = att_des
control = controller(0.086, 0.18)

env = gym.make('gym_docking:hovering-v0')

obs = env.reset()
for i in range(1000):
    env.seed(0)
    action = control.PID(state_des, obs)
    obs, reward, dones, info = env.step(action)
    # print('obs: ', obs)
    print('reward: ', reward)
    print('dones: ', dones)