Esempio n. 1
0
    def check_distance(self,
                       r0,
                       v0,
                       t_start,
                       t_end,
                       safe_radius=RADIUS_SATURN * 2):
        """ Is the periapsis closer than  safe radius, and if so are we passing periapsis? """

        # get orbital parameters
        a, e, _, _, _, E0 = ic2par(r0, v0, self.common_mu)
        rp = a * (1 - e)

        if rp > safe_radius:  # we are safe if periapsis is large enough
            return False
        else:
            # now we need to check if we actually pass the periapsis in this conic

            # propagate flight
            delta_t = t_end - t_start
            r, v = propagate_lagrangian(r0, v0, delta_t * DAY2SEC,
                                        self.common_mu)

            # get current orbital period [days]
            period = 2 * pi * (a**3 / self.common_mu)**.5 / DAY2SEC

            # calculate new anomaly
            _, _, _, _, _, E = ic2par(r, v, self.common_mu)

            # check if we actually pass close to planet
            return self.periapsis_passed(E0, E, delta_t, period)
    def pretty(self, x):
        # 1 -  we 'decode' the chromosome recording the various deep space
        # manouvres timing (days) in the list T
        T = list([0] * (self.N_max - 1))

        for i in range(len(T)):
            T[i] = log(x[2 + 4 * i])
        total = sum(T)
        T = [x[1] * time / total for time in T]

        # 2 - We compute the starting and ending position
        r_start, v_start = self.start.eph(epoch(x[0]))
        if self.phase_free:
            r_target, v_target = self.target.eph(epoch(x[-1]))
        else:
            r_target, v_target = self.target.eph(epoch(x[0] + x[1]))

        # 3 - We loop across inner impulses
        rsc = r_start
        vsc = v_start
        for i, time in enumerate(T[:-1]):
            theta = 2 * pi * x[3 + 4 * i]
            phi = acos(2 * x[4 + 4 * i] - 1) - pi / 2

            Vinfx = x[5 + 4 * i] * cos(phi) * cos(theta)
            Vinfy = x[5 + 4 * i] * cos(phi) * sin(theta)
            Vinfz = x[5 + 4 * i] * sin(phi)

            # We apply the (i+1)-th impulse
            vsc = [a + b for a, b in zip(vsc, [Vinfx, Vinfy, Vinfz])]
            rsc, vsc = propagate_lagrangian(
                rsc, vsc, T[i] * DAY2SEC, self.__common_mu)
        cw = (ic2par(rsc, vsc, self.start.mu_central_body)[2] > pi / 2)

        # We now compute the remaining two final impulses
        # Lambert arc to reach seq[1]
        dt = T[-1] * DAY2SEC
        l = lambert_problem(rsc, r_target, dt, self.__common_mu, cw, False)
        v_end_l = l.get_v2()[0]
        v_beg_l = l.get_v1()[0]

        DV1 = norm([a - b for a, b in zip(v_beg_l, vsc)])
        DV2 = norm([a - b for a, b in zip(v_end_l, v_target)])

        DV_others = list(x[5::4])
        DV_others.extend([DV1, DV2])

        print("Total DV (m/s): ", sum(DV_others))
        print("Dvs (m/s): ", DV_others)
        print("Tofs (days): ", T)
Esempio n. 3
0
    def fitness(self, x):
        # 1 -  we 'decode' the chromosome into the various deep space
        # manouvres times (days) in the list T
        T = list([0] * (self.N_max - 1))

        for i in range(len(T)):
            T[i] = log(x[2 + 4 * i])
        total = sum(T)
        T = [x[1] * time / total for time in T]

        # 2 - We compute the starting and ending position
        r_start, v_start = self.start.eph(epoch(x[0]))
        if self.phase_free:
            r_target, v_target = self.target.eph(epoch(x[-1]))
        else:
            r_target, v_target = self.target.eph(epoch(x[0] + x[1]))

        # 3 - We loop across inner impulses
        rsc = r_start
        vsc = v_start
        for i, time in enumerate(T[:-1]):
            theta = 2 * pi * x[3 + 4 * i]
            phi = acos(2 * x[4 + 4 * i] - 1) - pi / 2

            Vinfx = x[5 + 4 * i] * cos(phi) * cos(theta)
            Vinfy = x[5 + 4 * i] * cos(phi) * sin(theta)
            Vinfz = x[5 + 4 * i] * sin(phi)

            # We apply the (i+1)-th impulse
            vsc = [a + b for a, b in zip(vsc, [Vinfx, Vinfy, Vinfz])]
            rsc, vsc = propagate_lagrangian(
                rsc, vsc, T[i] * DAY2SEC, self.__common_mu)
        cw = (ic2par(rsc, vsc, self.start.mu_central_body)[2] > pi / 2)

        # We now compute the remaining two final impulses
        # Lambert arc to reach seq[1]
        dt = T[-1] * DAY2SEC
        l = lambert_problem(rsc, r_target, dt, self.__common_mu, cw, False)
        v_end_l = l.get_v2()[0]
        v_beg_l = l.get_v1()[0]

        DV1 = norm([a - b for a, b in zip(v_beg_l, vsc)])
        DV2 = norm([a - b for a, b in zip(v_end_l, v_target)])

        DV_others = sum(x[5::4])
        if self.obj_dim == 1:
            return (DV1 + DV2 + DV_others,)
        else:
            return (DV1 + DV2 + DV_others, x[1])
Esempio n. 4
0
    def plot(self, x, axes=None):
        """
        ax = prob.plot_trajectory(x, axes=None)

        - x: encoded trajectory
        - axes: matplotlib axis where to plot. If None figure and axis will be created
        - [out] ax: matplotlib axis where to plot

        Plots the trajectory represented by a decision vector x on the 3d axis ax

        Example::

          ax = prob.plot(x)
        """
        import matplotlib as mpl
        from mpl_toolkits.mplot3d import Axes3D
        import matplotlib.pyplot as plt
        from pykep.orbit_plots import plot_planet, plot_lambert, plot_kepler

        if axes is None:
            mpl.rcParams['legend.fontsize'] = 10
            fig = plt.figure()
            axes = fig.gca(projection='3d')

        axes.scatter(0, 0, 0, color='y')

        # 1 -  we 'decode' the chromosome recording the various deep space
        # manouvres timing (days) in the list T
        T = list([0] * (self.N_max - 1))

        for i in range(len(T)):
            T[i] = log(x[2 + 4 * i])
        total = sum(T)
        T = [x[1] * time / total for time in T]

        # 2 - We compute the starting and ending position
        r_start, v_start = self.start.eph(epoch(x[0]))
        if self.phase_free:
            r_target, v_target = self.target.eph(epoch(x[-1]))
        else:
            r_target, v_target = self.target.eph(epoch(x[0] + x[1]))
        plot_planet(self.start,
                    t0=epoch(x[0]),
                    color=(0.8, 0.6, 0.8),
                    legend=True,
                    units=AU,
                    ax=axes,
                    s=0)
        plot_planet(self.target,
                    t0=epoch(x[0] + x[1]),
                    color=(0.8, 0.6, 0.8),
                    legend=True,
                    units=AU,
                    ax=axes,
                    s=0)

        DV_list = x[5::4]
        maxDV = max(DV_list)
        DV_list = [s / maxDV * 30 for s in DV_list]
        colors = ['b', 'r'] * (len(DV_list) + 1)

        # 3 - We loop across inner impulses
        rsc = r_start
        vsc = v_start
        for i, time in enumerate(T[:-1]):
            theta = 2 * pi * x[3 + 4 * i]
            phi = acos(2 * x[4 + 4 * i] - 1) - pi / 2

            Vinfx = x[5 + 4 * i] * cos(phi) * cos(theta)
            Vinfy = x[5 + 4 * i] * cos(phi) * sin(theta)
            Vinfz = x[5 + 4 * i] * sin(phi)

            # We apply the (i+1)-th impulse
            vsc = [a + b for a, b in zip(vsc, [Vinfx, Vinfy, Vinfz])]
            axes.scatter(rsc[0] / AU,
                         rsc[1] / AU,
                         rsc[2] / AU,
                         color='k',
                         s=DV_list[i])
            plot_kepler(rsc,
                        vsc,
                        T[i] * DAY2SEC,
                        self.__common_mu,
                        N=200,
                        color=colors[i],
                        legend=False,
                        units=AU,
                        ax=axes)
            rsc, vsc = propagate_lagrangian(rsc, vsc, T[i] * DAY2SEC,
                                            self.__common_mu)

        cw = (ic2par(rsc, vsc, self.start.mu_central_body)[2] > pi / 2)
        # We now compute the remaining two final impulses
        # Lambert arc to reach seq[1]
        dt = T[-1] * DAY2SEC
        l = lambert_problem(rsc, r_target, dt, self.__common_mu, cw, False)
        plot_lambert(l,
                     sol=0,
                     color=colors[i + 1],
                     legend=False,
                     units=AU,
                     ax=axes,
                     N=200)
        v_end_l = l.get_v2()[0]
        v_beg_l = l.get_v1()[0]
        DV1 = norm([a - b for a, b in zip(v_beg_l, vsc)])
        DV2 = norm([a - b for a, b in zip(v_end_l, v_target)])

        axes.scatter(rsc[0] / AU,
                     rsc[1] / AU,
                     rsc[2] / AU,
                     color='k',
                     s=min(DV1 / maxDV * 30, 40))
        axes.scatter(r_target[0] / AU,
                     r_target[1] / AU,
                     r_target[2] / AU,
                     color='k',
                     s=min(DV2 / maxDV * 30, 40))

        return axes
Esempio n. 5
0
#Mission data

#Time-of-flight
tf = t_nom[-1] - t_nom[0]  #s, Time-of-flight
dt = tf / NSTEPS  #s, time-step

#Reference initial state
m0 = m_nom[0]  #kg, initial spacecraft mass
r0 = [rx_nom[0], ry_nom[0], rz_nom[0]]  #km, initial spacecraft position
v0 = [vx_nom[0], vy_nom[0], vz_nom[0]]  #km/s, initial spacecraft velocity

#Target initial state
rTf = [rx_nom[-1], ry_nom[-1], rz_nom[-1]]  #km, final target position
vTf = [vx_nom[-1], vy_nom[-1], vz_nom[-1]]  #km/s, final target velocity
coeT = ic2par(r=rTf, v=vTf)  #orbital elements of the target
PT = 2. * np.pi * np.sqrt(coeT[0]**3)  #s, target orbital period
rT0, vT0 = propagate_lagrangian(
    r0=rTf, v0=vTf,
    tof=PT - tf)  #km, km/s, initial target position and velocity

#Epsilon constraint schedule
eps_schedule = [eps_schedule[-1]]

# Environment creation
env0 = gym.make(id=env_name, \
            impulsive=impulsive, action_coord=action_coord, obs_type=obs_type, \
            random_obs=False, stochastic=False, mission_type=mission_type, \
            NSTEPS=NSTEPS, NITER=NSTEPS, \
            eps_schedule=eps_schedule, lambda_con=lambda_con, \
            Tmax=Tmax, ueq=ueq, tf=tf, amu=1., m0=m0, \
Esempio n. 6
0
    def plot(self, x, axes=None):
        """
        ax = prob.plot_trajectory(x, axes=None)

        - x: encoded trajectory
        - axes: matplotlib axis where to plot. If None figure and axis will be created
        - [out] ax: matplotlib axis where to plot

        Plots the trajectory represented by a decision vector x on the 3d axis ax

        Example::

          ax = prob.plot(x)
        """
        import matplotlib as mpl
        from mpl_toolkits.mplot3d import Axes3D
        import matplotlib.pyplot as plt
        from pykep.orbit_plots import plot_planet, plot_lambert, plot_kepler

        if axes is None:
            mpl.rcParams['legend.fontsize'] = 10
            fig = plt.figure()
            axes = fig.gca(projection='3d')

        axes.scatter(0, 0, 0, color='y')

        # 1 -  we 'decode' the chromosome recording the various deep space
        # manouvres timing (days) in the list T
        T = list([0] * (self.N_max - 1))

        for i in range(len(T)):
            T[i] = log(x[2 + 4 * i])
        total = sum(T)
        T = [x[1] * time / total for time in T]

        # 2 - We compute the starting and ending position
        r_start, v_start = self.start.eph(epoch(x[0]))
        if self.phase_free:
            r_target, v_target = self.target.eph(epoch(x[-1]))
        else:
            r_target, v_target = self.target.eph(epoch(x[0] + x[1]))
        plot_planet(self.start, t0=epoch(x[0]), color=(
            0.8, 0.6, 0.8), legend=True, units=AU, ax=axes, s=0)
        plot_planet(self.target, t0=epoch(
            x[0] + x[1]), color=(0.8, 0.6, 0.8), legend=True, units=AU, ax=axes, s=0)

        DV_list = x[5::4]
        maxDV = max(DV_list)
        DV_list = [s / maxDV * 30 for s in DV_list]
        colors = ['b', 'r'] * (len(DV_list) + 1)

        # 3 - We loop across inner impulses
        rsc = r_start
        vsc = v_start
        for i, time in enumerate(T[:-1]):
            theta = 2 * pi * x[3 + 4 * i]
            phi = acos(2 * x[4 + 4 * i] - 1) - pi / 2

            Vinfx = x[5 + 4 * i] * cos(phi) * cos(theta)
            Vinfy = x[5 + 4 * i] * cos(phi) * sin(theta)
            Vinfz = x[5 + 4 * i] * sin(phi)

            # We apply the (i+1)-th impulse
            vsc = [a + b for a, b in zip(vsc, [Vinfx, Vinfy, Vinfz])]
            axes.scatter(rsc[0] / AU, rsc[1] / AU, rsc[2] /
                         AU, color='k', s=DV_list[i])
            plot_kepler(rsc, vsc, T[i] * DAY2SEC, self.__common_mu,
                        N=200, color=colors[i], legend=False, units=AU, ax=axes)
            rsc, vsc = propagate_lagrangian(
                rsc, vsc, T[i] * DAY2SEC, self.__common_mu)

        cw = (ic2par(rsc, vsc, self.start.mu_central_body)[2] > pi / 2)
        # We now compute the remaining two final impulses
        # Lambert arc to reach seq[1]
        dt = T[-1] * DAY2SEC
        l = lambert_problem(rsc, r_target, dt, self.__common_mu, cw, False)
        plot_lambert(l, sol=0, color=colors[
                     i + 1], legend=False, units=AU, ax=axes, N=200)
        v_end_l = l.get_v2()[0]
        v_beg_l = l.get_v1()[0]
        DV1 = norm([a - b for a, b in zip(v_beg_l, vsc)])
        DV2 = norm([a - b for a, b in zip(v_end_l, v_target)])

        axes.scatter(rsc[0] / AU, rsc[1] / AU, rsc[2] / AU,
                     color='k', s=min(DV1 / maxDV * 30, 40))
        axes.scatter(r_target[0] / AU, r_target[1] / AU,
                     r_target[2] / AU, color='k', s=min(DV2 / maxDV * 30, 40))

        return axes
Esempio n. 7
0
    def reset(self):
        """
        :return obs: observation vector

        """

        # Environment variables
        self.rk = self.r0  # position at the k-th time step, km
        self.vkm = self.v0  # velocity at the k-th time step, before DV, km/s
        self.mk = self.m0  # mass at the k-th time step, kg
        self.mpk = 0.  # propellant mass expelled at the k-th time step
        self.tk = 0.  # k-th time step
        self.tk_left = self.NSTEPS  # steps-to-go
        self.t = 0.  # time from departure
        self.t_left = self.tf  # time-to-go

        if (self.stochastic == True):
            #Select the segment with MTE
            pr = np_uniform(0., 1.)
            if (self.MTE == True) and (pr < 1):
                self.tk_mte = np_randint(0, int(self.NSTEPS))
                self.n_mte = 0  #number of MTEs so far
            else:
                self.tk_mte = -1

        # Reset parameters
        self.sol = {
            'rx': [],
            'ry': [],
            'rz': [],
            'vx': [],
            'vy': [],
            'vz': [],
            'ux': [],
            'uy': [],
            'uz': [],
            'm': [],
            't': []
        }
        self.steps_beyond_done = 0
        self.done = False

        rk_obs = self.rk
        vkm_obs = self.vkm

        # Observations
        if self.obs_type == 0:
            obs = np.array([rk_obs[0], rk_obs[1], rk_obs[2], \
                vkm_obs[0], vkm_obs[1], vkm_obs[2], \
                self.mk, self.t]).astype(np.float64)
        elif self.obs_type == 1:
            coesk = ic2par(r=rk_obs, v=vkm_obs, mu=self.amu)
            obs = np.array([coesk[0], coesk[1], coesk[2]/np.pi, \
                coesk[3]/(2*np.pi), coesk[4]/(2*np.pi), coesk[5]/(2*np.pi), \
                self.mk, self.t]).astype(np.float64)
        elif self.obs_type == 2:
            meesk = ic2eq(r=rk_obs, v=vkm_obs, mu=self.amu)
            obs = np.array([meesk[0], meesk[1], meesk[2], \
                meesk[3], meesk[4], meesk[5]/(2*np.pi), \
                self.mk, self.t]).astype(np.float64)
        else:
            obs = np.array([rk_obs[0], rk_obs[1], rk_obs[2], \
                vkm_obs[0], vkm_obs[1], vkm_obs[2], \
                self.mk, self.t, 0., 0., 0.]).astype(np.float64)

        return obs
Esempio n. 8
0
    def step(self, action):
        """
        :param action: current action
        :return obs, reward, done, info

        """
        # Invalid action
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))

        # Perturbate the action
        if self.stochastic == True:
            action = self.perturbAction(action)

        # State at next time step and current control
        rk1, vk1m, mk1, uk = self.propagation_step(action, self.dt)

        # Perturbate the state
        if self.stochastic == True and self.tk < self.NSTEPS - 1:
            drk, dvk = self.stateErrors()
            rk1 = rk1 + drk
            vk1m = vk1m + dvk

        # Info (state at the beginning of the segment)
        self.sol['rx'] = self.rk[0]
        self.sol['ry'] = self.rk[1]
        self.sol['rz'] = self.rk[2]
        self.sol['vx'] = self.vkm[0]
        self.sol['vy'] = self.vkm[1]
        self.sol['vz'] = self.vkm[2]
        self.sol['ux'] = uk[0]
        self.sol['uy'] = uk[1]
        self.sol['uz'] = uk[2]
        self.sol['m'] = self.mk
        self.sol['t'] = self.t
        info = self.sol

        # Update the spacecraft state
        self.rk = rk1
        self.vkm = vk1m
        self.mpk = self.mk - mk1
        self.mk = mk1
        self.tk += 1.
        self.tk_left -= 1.
        self.t = self.tk * self.dt
        self.t_left = self.tk_left * self.dt

        #Errors on observations
        rk_obs = self.rk
        vkm_obs = self.vkm
        if self.random_obs == True:
            drk, dvk = self.obsErrors()
            rk_obs = rk_obs + drk
            vkm_obs = vkm_obs + dvk

        # Observations
        if self.obs_type == 0:
            obs = np.array([rk_obs[0], rk_obs[1], rk_obs[2], \
                vkm_obs[0], vkm_obs[1], vkm_obs[2], \
                self.mk, self.t]).astype(np.float64)
        elif self.obs_type == 1:
            coesk = ic2par(r=rk_obs, v=vkm_obs, mu=self.amu)
            obs = np.array([coesk[0], coesk[1], coesk[2]/np.pi, \
                coesk[3]/(2*np.pi), coesk[4]/(2*np.pi), coesk[5]/(2*np.pi), \
                self.mk, self.t]).astype(np.float64)
        elif self.obs_type == 2:
            meesk = ic2eq(r=rk_obs, v=vkm_obs, mu=self.amu)
            obs = np.array([meesk[0], meesk[1], meesk[2], \
                meesk[3], meesk[4], meesk[5]/(2*np.pi), \
                self.mk, self.t]).astype(np.float64)
        else:
            obs = np.array([rk_obs[0], rk_obs[1], rk_obs[2], \
                vkm_obs[0], vkm_obs[1], vkm_obs[2], \
                self.mk, self.t, action[0], action[1], action[2]]).astype(np.float64)

        # Update training steps
        self.training_steps += 1.

        # Episode termination
        done = (self.isDone() or self.safeStop(self.rk, self.vkm, self.mk))

        # Reward
        reward = self.getReward(done, action)

        return obs, float(reward), done, info
Esempio n. 9
0
    def __init__(self, impulsive, action_coord, obs_type, \
        random_obs, stochastic, mission_type,
        NSTEPS, NITER, \
        eps_schedule, lambda_con, \
        Tmax, ueq, tf, amu, m0, \
        r0, v0, \
        rTf, vTf, \
        sigma_r, sigma_v, \
        sigma_u_rot, sigma_u_norm, \
        MTE, pr_MTE):

        super(LowThrustEnv, self).__init__()
        """ Class attributes """
        self.impulsive = bool(impulsive)
        self.action_coord = bool(action_coord)
        self.obs_type = int(obs_type)
        self.random_obs = bool(random_obs)
        self.stochastic = bool(stochastic)
        self.mission_type = bool(mission_type)
        self.NSTEPS = float(NSTEPS)
        self.NITER = float(NITER)
        self.eps_schedule = array(eps_schedule)
        self.lambda_con = float(lambda_con)
        self.Tmax = float(Tmax)
        self.ueq = float(ueq)
        self.tf = float(tf)
        self.amu = float(amu)
        self.m0 = float(m0)
        self.r0 = array(r0)
        self.v0 = array(v0)
        self.rTf = array(rTf)
        self.vTf = array(vTf)
        self.sigma_r = float(sigma_r)  # 6.6845e-15, 6.6845e-9
        self.sigma_v = float(sigma_v)  # 1.6787e-4, 1.6787e-3
        self.sigma_u_rot = float(sigma_u_rot)  # 1 deg
        self.sigma_u_norm = float(sigma_u_norm)  # 0.1
        self.MTE = bool(MTE)
        self.pr_MTE = float(pr_MTE)  # 0.025
        """ Time variables """
        self.dt = self.tf / self.NSTEPS  # time step, s
        self.training_steps = 0.  # number of training steps
        self.mdot_max = sqrt(
            3.) * self.Tmax / self.ueq  # maximum mass flow rate
        """ Environment boundaries """
        coe0 = ic2par(
            r=self.r0, v=self.v0,
            mu=self.amu)  # initial classical orbital element of the S/C
        coeT = ic2par(r=self.rTf, v=self.vTf,
                      mu=self.amu)  # classical orbital element of the target
        rpT = coeT[0] * (1 - coeT[1])  # periapsis radius of the target, km
        raT = coeT[0] * (1 + coeT[1])  # apoapsis radius of the target, km
        vpT = sqrt(
            self.amu *
            (2. / rpT - 1 / coeT[0]))  # periapsis velocity of the target, km/s
        self.min_r = 0.1 * min(norm(self.r0), rpT)  # minimum radius, km
        self.max_r = 4. * max(norm(self.r0), raT)  # maximum radius, km
        self.max_v = 4. * max(norm(self.v0), vpT)  # maximum velocity, km/s
        self.max_a = 4. * max(coe0[0], coeT[0])  # maximum semi-major axis, km
        self.max_p = 4.*max(coe0[0]*(1-coe0[1]**2), \
            coeT[0]*(1-coeT[1]**2))                               # maximum semi-major axis, km
        self.max_mte = 3  # maximum number of MTEs
        """ OBSERVATION SPACE """
        if self.obs_type == 0:
            # Lower bounds
            x_lb = np.array([-self.max_r, -self.max_r, -self.max_r, \
                -self.max_v, -self.max_v, -self.max_v, \
                0., 0.])
            # Upper bounds
            x_ub = np.array([+self.max_r, +self.max_r, +self.max_r, \
                +self.max_v, +self.max_v, +self.max_v, \
                1., 1.])
        elif self.obs_type == 1:
            # Lower bounds
            x_lb = np.array([0., 0., 0., \
                0., 0., 0., \
                0., 0.])
            # Upper bounds
            x_ub = np.array([self.max_a, 1., 1., \
                1., 1., 1., \
                1., 1.])
        elif self.obs_type == 2:
            # Lower bounds
            x_lb = np.array([0., -1., -1., \
                -1., -1., 0., \
                0., 0.])
            # Upper bounds
            x_ub = np.array([self.max_p, 1., 1., \
                1., 1., 1., \
                1., 1.])
        else:
            # Lower bounds
            x_lb = np.array([-self.max_r, -self.max_r, -self.max_r, \
                -self.max_v, -self.max_v, -self.max_v, \
                0., 0., -1., -1., -1.])
            # Upper bounds
            x_ub = np.array([+self.max_r, +self.max_r, +self.max_r, \
                +self.max_v, +self.max_v, +self.max_v, \
                1., 1., 1., 1., 1.])

        self.observation_space = spaces.Box(x_lb, x_ub, dtype=np.float64)
        """ ACTION ASPACE """
        # Lower bounds
        a_lb = np.array([-1., -1., -1.])
        # Upper bounds
        a_ub = np.array([1., 1., 1.])

        self.action_space = spaces.Box(a_lb, a_ub, dtype=np.float64)
        """ Environment initialization """
        self.viewer = None
        self.state = None