def __init__(self):
     self.my_heli = Helicopter()
     self.my_contr = Controller()
     self.ENV_ID = "CustomEnv-v0"
     # ENV_ID = "CartPole-v0"
     self.env = gym.make(self.ENV_ID)
     self.env.current_states = self.env.reset()
Beispiel #2
0
    def __init__(self):
        self.U_input = [U1, U2, U3, U4] = sp.symbols("U1:5", real=True)
        self.x_state = [
            u_velocity,
            v_velocity,
            w_velocity,
            p_angle,
            q_angle,
            r_angle,
            fi_angle,
            theta_angle,
            si_angle,
            xI,
            yI,
            zI,
            a_flapping,
            b_flapping,
            c_flapping,
            d_flapping,
        ] = sp.symbols("x1:17", real=True)
        self.My_helicopter = Helicopter()
        self.My_controller = Controller()
        self.t = sp.symbols("t")
        self.symbolic_states_math, jacobian = self.My_helicopter.lambd_eq_maker(self.t, self.x_state, self.U_input)
        self.default_range = default_range = (-20, 20)
        self.observation_space_domain = {
            "u_velocity": default_range,
            "v_velocity": default_range,
            "w_velocity": default_range,
            "p_angle": default_range,
            "q_angle": default_range,
            "r_angle": default_range,
            "fi_angle": default_range,
            "theta_angle": default_range,
            "si_angle": default_range,
            "xI": default_range,
            "yI": default_range,
            "zI": default_range,
            "a_flapping": default_range,
            "b_flapping": default_range,
            "c_flapping": default_range,
            "d_flapping": default_range,
        }
        self.low_obs_space = np.array(list(zip(*self.observation_space_domain.values()))[0], dtype=np.float32)
        self.high_obs_space = np.array(list(zip(*self.observation_space_domain.values()))[1], dtype=np.float32)
        self.observation_space = spaces.Box(low=self.low_obs_space, high=self.high_obs_space, dtype=np.float32)
        self.default_act_range = default_act_range = (-0.3, 0.3)

        self.action_space_domain = {
            "deltacol": default_act_range,
            "deltalat": default_act_range,
            "deltalon": default_act_range,
            "deltaped": default_act_range,
            # "f1": (0.1, 5), "f2": (0.5, 20), "f3": (0.5, 20), "f4": (0.5, 10),
            # "lambda1": (0.5, 10), "lambda2": (0.1, 5), "lambda3": (0.1, 5), "lambda4": (0.1, 5),
            # "eta1": (0.2, 5), "eta2": (0.1, 5), "eta3": (0.1, 5), "eta4": (0.1, 5),
        }
        self.low_action_space = np.array(list(zip(*self.action_space_domain.values()))[0], dtype=np.float32)
        self.high_action_space = np.array(list(zip(*self.action_space_domain.values()))[1], dtype=np.float32)
        self.action_space = spaces.Box(low=self.low_action_space, high=self.high_action_space, dtype=np.float32)
        self.min_reward = -13
        self.t_start, self.dt, self.t_end = 0, 0.03, 2
        self.no_timesteps = int((self.t_end - self.t_start) / self.dt)
        self.all_t = np.linspace(self.t_start, self.t_end, self.no_timesteps)
        self.counter = 0
        self.best_reward = -100_000_000
        self.longest_num_step = 0
        self.reward_check_time = 0.7 * self.t_end
        self.high_action_diff = 0.2
        obs_header = str(list(self.observation_space_domain.keys()))[1:-1]
        act_header = str(list(self.action_space_domain.keys()))[1:-1]
        self.header = "time, " + act_header + ", " + obs_header + ", reward" + ", control reward"
        self.saver = save_files()
        self.reward_array = np.array((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), dtype=np.float32)
Beispiel #3
0
class CustomEnv(gym.Env, ABC):
    def __init__(self):
        self.U_input = [U1, U2, U3, U4] = sp.symbols("U1:5", real=True)
        self.x_state = [
            u_velocity,
            v_velocity,
            w_velocity,
            p_angle,
            q_angle,
            r_angle,
            fi_angle,
            theta_angle,
            si_angle,
            xI,
            yI,
            zI,
            a_flapping,
            b_flapping,
            c_flapping,
            d_flapping,
        ] = sp.symbols("x1:17", real=True)
        self.My_helicopter = Helicopter()
        self.My_controller = Controller()
        self.t = sp.symbols("t")
        self.symbolic_states_math, jacobian = self.My_helicopter.lambd_eq_maker(self.t, self.x_state, self.U_input)
        self.default_range = default_range = (-20, 20)
        self.observation_space_domain = {
            "u_velocity": default_range,
            "v_velocity": default_range,
            "w_velocity": default_range,
            "p_angle": default_range,
            "q_angle": default_range,
            "r_angle": default_range,
            "fi_angle": default_range,
            "theta_angle": default_range,
            "si_angle": default_range,
            "xI": default_range,
            "yI": default_range,
            "zI": default_range,
            "a_flapping": default_range,
            "b_flapping": default_range,
            "c_flapping": default_range,
            "d_flapping": default_range,
        }
        self.low_obs_space = np.array(list(zip(*self.observation_space_domain.values()))[0], dtype=np.float32)
        self.high_obs_space = np.array(list(zip(*self.observation_space_domain.values()))[1], dtype=np.float32)
        self.observation_space = spaces.Box(low=self.low_obs_space, high=self.high_obs_space, dtype=np.float32)
        self.default_act_range = default_act_range = (-0.3, 0.3)

        self.action_space_domain = {
            "deltacol": default_act_range,
            "deltalat": default_act_range,
            "deltalon": default_act_range,
            "deltaped": default_act_range,
            # "f1": (0.1, 5), "f2": (0.5, 20), "f3": (0.5, 20), "f4": (0.5, 10),
            # "lambda1": (0.5, 10), "lambda2": (0.1, 5), "lambda3": (0.1, 5), "lambda4": (0.1, 5),
            # "eta1": (0.2, 5), "eta2": (0.1, 5), "eta3": (0.1, 5), "eta4": (0.1, 5),
        }
        self.low_action_space = np.array(list(zip(*self.action_space_domain.values()))[0], dtype=np.float32)
        self.high_action_space = np.array(list(zip(*self.action_space_domain.values()))[1], dtype=np.float32)
        self.action_space = spaces.Box(low=self.low_action_space, high=self.high_action_space, dtype=np.float32)
        self.min_reward = -13
        self.t_start, self.dt, self.t_end = 0, 0.03, 2
        self.no_timesteps = int((self.t_end - self.t_start) / self.dt)
        self.all_t = np.linspace(self.t_start, self.t_end, self.no_timesteps)
        self.counter = 0
        self.best_reward = -100_000_000
        self.longest_num_step = 0
        self.reward_check_time = 0.7 * self.t_end
        self.high_action_diff = 0.2
        obs_header = str(list(self.observation_space_domain.keys()))[1:-1]
        act_header = str(list(self.action_space_domain.keys()))[1:-1]
        self.header = "time, " + act_header + ", " + obs_header + ", reward" + ", control reward"
        self.saver = save_files()
        self.reward_array = np.array((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), dtype=np.float32)

    def action_wrapper(self, current_action: [-1, 1]) -> np.array:
        current_action = np.array(current_action)
        # current_action = current_action * (self.high_action_space - self.low_action_space) / 2 \
        #  + (self.high_action_space + self.low_action_space) / 2
        self.all_actions[self.counter] = self.control_input = np.clip(
            current_action, self.low_action_space, self.high_action_space
        )

    def find_next_state(self) -> list:
        current_t = self.dt * self.counter
        self.current_states = self.My_helicopter.RK45(
            current_t,
            self.current_states,
            self.symbolic_states_math,
            self.dt,
            self.control_input,
        )

    def observation_function(self) -> list:
        self.all_obs[self.counter] = observation = list(self.current_states)
        return observation

    def reward_function(self) -> float:
        # add reward slope to the reward
        error = -np.linalg.norm((abs(self.current_states[0:12]).reshape(12)), 1)
        self.control_rewards[self.counter] = error
        for i in range(12):
            self.reward_array[i] = abs(self.current_states[i]) / self.default_range[1]
        reward = self.all_rewards[self.counter] = (
            -sum(self.reward_array) + 0.17 / self.default_range[1]
        )  # control reward
        reward += 0.1 * float(
            self.control_rewards[self.counter] - self.control_rewards[self.counter - 1]
        )  # control slope
        reward += -0.05 * sum(abs(self.all_actions[self.counter]))  # input reward
        for i in (self.high_action_diff - self.all_actions[self.counter] - self.all_actions[self.counter - 1]) ** 2:
            reward += -min(0, i)
        return float(reward)

    def check_diverge(self) -> bool:
        if max(abs(self.current_states)) > self.default_range[1]:
            print("state_diverge")
            self.jj = 1
            return True
        if self.counter >= self.no_timesteps - 1:  # number of timesteps
            print("successful")
            return True
        # after self.reward_check_time it checks whether or not the reward is decreasing
        if self.counter > self.reward_check_time / self.dt:
            diverge_criteria = (
                self.all_rewards[self.counter] - self.all_rewards[int(self.counter - self.reward_check_time / self.dt)]
            )
            if diverge_criteria < -0.001:
                print("reward_diverge")
                self.jj = 1
                return True
        bool_1 = any(np.isnan(self.current_states))
        bool_2 = any(np.isinf(self.current_states))
        if bool_1 or bool_2:
            self.jj = 1
            print("state_inf_nan_diverge")
        return False

    def done_jobs(self) -> None:
        current_num_step = self.counter
        current_total_reward = sum(self.all_rewards)
        if current_num_step > 10:
            self.saver.reward_step_save(self.best_reward, self.longest_num_step, current_total_reward, current_num_step)
        if current_num_step >= self.longest_num_step:
            self.longest_num_step = current_num_step
        if current_total_reward > self.best_reward and sum(self.all_rewards) != 0:
            self.best_reward = sum(self.all_rewards)
            self.saver.best_reward_save(
                self.all_t, self.all_actions, self.all_obs, self.all_rewards, self.control_rewards, self.header
            )

    def step(self, current_action):
        self.action_wrapper(current_action)
        try:
            self.find_next_state()
        except OverflowError or ValueError or IndexError:
            self.jj = 1
        observation = self.observation_function()
        reward = self.reward_function()
        self.done = self.check_diverge()
        if self.jj == 1:
            observation = list((self.all_obs[self.counter]) - self.default_range[0])
            reward = self.min_reward
        if self.done:
            self.done_jobs()

        self.counter += 1
        return observation, reward, self.done, {}

    def reset(self):
        # initialization
        self.all_obs = np.zeros((self.no_timesteps, len(self.high_obs_space)))
        self.all_actions = np.zeros((self.no_timesteps, len(self.high_action_space)))
        self.all_rewards = np.zeros((self.no_timesteps, 1))
        self.control_rewards = np.zeros((self.no_timesteps, 1))
        self.control_input = np.array((0, 0, 0, 0), dtype=np.float32)
        self.jj = 0
        self.counter = 0
        # Yd, Ydotd, Ydotdotd, Y, Ydot = self.My_controller.Yposition(0, self.current_states)
        self.current_states = self.initial_states = [#0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
            3.70e-04,  # 0u
            1.15e-02,  # 1v
            4.36e-04,  # 2w
            -5.08e-03,  # 3p
            2.04e-04,  # 4q
            2.66e-05,  # 5r
            -1.08e-01,  # 6fi
            1.01e-04,  # 7theta
            -1.03e-03,  # 8si
            -4.01e-05,  # 9x
            -5.26e-02,  # 10y
            -2.94e-04,  # 11z
            -4.36e-06,  # 12a
            -9.77e-07,  # 13b
            -5.66e-05,  # 14c
            7.81e-04,  # 15d
        ]
        self.all_obs[self.counter] = observation = self.current_states
        self.done = False
        return observation

    def make_constant(self, true_list):
        for i in range(true_list):
            if i == 1: 
                self.current_states[i] = self.initial_states[i]
Beispiel #4
0
# import csv
import gym
from env.Helicopter import Helicopter
from env.controller import Controller
from finding_random_states_and_actions import state_finder
from simple_pid import PID
import numpy as np

my_state_finder = state_finder()
my_heli = Helicopter()
my_contr = Controller()
ENV_ID = "CustomEnv-v0"
my_env = gym.make(ENV_ID)
current_best_rew = -100000000000

# print(sl_action)
done = False
# [0.8,0,0] lateral

k_bin = np.linspace(-1, 1, num=10)
i_bin = np.linspace(-1, 1, num=5)

current_action = [0, 0, 0, 0]
sl_action = [1, 10, 10, 5, 5, 1, 1, 2, 2, 1, 1, 1, 1]
# pid_lat = PID(3.2, 2, -3.5, setpoint=-1.08e-01)
# pid_lon = PID(0.2, 0, 0, setpoint=0)
# pid_col = PID(-2, -3, -0.3, setpoint=-1)
# pid_ped = PID(-1, -1, -0.5, setpoint=0)
for i in range((len(k_bin))):
    for j in range((len(i_bin))):
        kk = k_bin[i]