def __init__(self):
     self.my_heli = Helicopter()
     self.my_contr = Controller()
     self.ENV_ID = "CustomEnv-v0"
     # ENV_ID = "CartPole-v0"
     self.env = gym.make(self.ENV_ID)
     self.env.current_states = self.env.reset()
class state_finder:
    def __init__(self):
        self.my_heli = Helicopter()
        self.my_contr = Controller()
        self.ENV_ID = "CustomEnv-v0"
        # ENV_ID = "CartPole-v0"
        self.env = gym.make(self.ENV_ID)
        self.env.current_states = self.env.reset()

    def get_action(self,
                   current_states,
                   sl_action=[1, 10, 10, 5, 5, 1, 1, 2, 2, 1, 1, 1, 1]):
        action = self.my_contr.Controller_model(current_states,
                                                self.env.dt * self.env.counter,
                                                sl_action)
        return action
Ejemplo n.º 3
0
    def __init__(self):
        self.U_input = [U1, U2, U3, U4] = sp.symbols("U1:5", real=True)
        self.x_state = [
            u_velocity,
            v_velocity,
            w_velocity,
            p_angle,
            q_angle,
            r_angle,
            fi_angle,
            theta_angle,
            si_angle,
            xI,
            yI,
            zI,
            a_flapping,
            b_flapping,
            c_flapping,
            d_flapping,
        ] = sp.symbols("x1:17", real=True)
        self.My_helicopter = Helicopter()
        self.My_controller = Controller()
        self.t = sp.symbols("t")
        self.symbolic_states_math, jacobian = self.My_helicopter.lambd_eq_maker(self.t, self.x_state, self.U_input)
        self.default_range = default_range = (-20, 20)
        self.observation_space_domain = {
            "u_velocity": default_range,
            "v_velocity": default_range,
            "w_velocity": default_range,
            "p_angle": default_range,
            "q_angle": default_range,
            "r_angle": default_range,
            "fi_angle": default_range,
            "theta_angle": default_range,
            "si_angle": default_range,
            "xI": default_range,
            "yI": default_range,
            "zI": default_range,
            "a_flapping": default_range,
            "b_flapping": default_range,
            "c_flapping": default_range,
            "d_flapping": default_range,
        }
        self.low_obs_space = np.array(list(zip(*self.observation_space_domain.values()))[0], dtype=np.float32)
        self.high_obs_space = np.array(list(zip(*self.observation_space_domain.values()))[1], dtype=np.float32)
        self.observation_space = spaces.Box(low=self.low_obs_space, high=self.high_obs_space, dtype=np.float32)
        self.default_act_range = default_act_range = (-0.3, 0.3)

        self.action_space_domain = {
            "deltacol": default_act_range,
            "deltalat": default_act_range,
            "deltalon": default_act_range,
            "deltaped": default_act_range,
            # "f1": (0.1, 5), "f2": (0.5, 20), "f3": (0.5, 20), "f4": (0.5, 10),
            # "lambda1": (0.5, 10), "lambda2": (0.1, 5), "lambda3": (0.1, 5), "lambda4": (0.1, 5),
            # "eta1": (0.2, 5), "eta2": (0.1, 5), "eta3": (0.1, 5), "eta4": (0.1, 5),
        }
        self.low_action_space = np.array(list(zip(*self.action_space_domain.values()))[0], dtype=np.float32)
        self.high_action_space = np.array(list(zip(*self.action_space_domain.values()))[1], dtype=np.float32)
        self.action_space = spaces.Box(low=self.low_action_space, high=self.high_action_space, dtype=np.float32)
        self.min_reward = -13
        self.t_start, self.dt, self.t_end = 0, 0.03, 2
        self.no_timesteps = int((self.t_end - self.t_start) / self.dt)
        self.all_t = np.linspace(self.t_start, self.t_end, self.no_timesteps)
        self.counter = 0
        self.best_reward = -100_000_000
        self.longest_num_step = 0
        self.reward_check_time = 0.7 * self.t_end
        self.high_action_diff = 0.2
        obs_header = str(list(self.observation_space_domain.keys()))[1:-1]
        act_header = str(list(self.action_space_domain.keys()))[1:-1]
        self.header = "time, " + act_header + ", " + obs_header + ", reward" + ", control reward"
        self.saver = save_files()
        self.reward_array = np.array((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), dtype=np.float32)
Ejemplo n.º 4
0
# import csv
import gym
from env.Helicopter import Helicopter
from env.controller import Controller
from finding_random_states_and_actions import state_finder
from simple_pid import PID
import numpy as np

my_state_finder = state_finder()
my_heli = Helicopter()
my_contr = Controller()
ENV_ID = "CustomEnv-v0"
my_env = gym.make(ENV_ID)
current_best_rew = -100000000000

# print(sl_action)
done = False
# [0.8,0,0] lateral

k_bin = np.linspace(-1, 1, num=10)
i_bin = np.linspace(-1, 1, num=5)

current_action = [0, 0, 0, 0]
sl_action = [1, 10, 10, 5, 5, 1, 1, 2, 2, 1, 1, 1, 1]
# pid_lat = PID(3.2, 2, -3.5, setpoint=-1.08e-01)
# pid_lon = PID(0.2, 0, 0, setpoint=0)
# pid_col = PID(-2, -3, -0.3, setpoint=-1)
# pid_ped = PID(-1, -1, -0.5, setpoint=0)
for i in range((len(k_bin))):
    for j in range((len(i_bin))):
        kk = k_bin[i]
Ejemplo n.º 5
0
epochs = 200000
hidden_size = 400
model = ActorCritic(inputDim, outputDim, hidden_size).to(device)

model.load_state_dict(torch.load("model.pt"))
model.eval()


def trch_ft_device(input, device):
    output = torch.FloatTensor(input).to(device)
    return output


my_state_finder = state_finder()
my_heli = Helicopter()
my_contr = Controller()
ENV_ID = "CustomEnv-v0"
my_env = gym.make(ENV_ID)
current_best_rew = -100000000000
done = False
my_env.current_states = my_env.reset()
while not done:
    state = trch_ft_device(my_env.current_states, device)
    state = torch.FloatTensor(state).unsqueeze(0).to(device)
    dist, value = model(state)

    current_action = dist.sample()
    my_env.current_states, b, done, _ = my_env.step(list(current_action.numpy().squeeze()))
if my_env.best_reward > current_best_rew:
    current_best_rew = my_env.best_reward
    # print("updated", sl_action)
Ejemplo n.º 6
0
import gym
from env.Helicopter import Helicopter
from env.controller import Controller
from finding_random_states_and_actions import state_finder
from simple_pid import PID
import numpy as np

my_state_finder = state_finder()
my_heli = Helicopter()
my_contr = Controller()
ENV_ID = "CustomEnv-v0"
my_env = gym.make(ENV_ID)
current_best_rew = -100000000000

# print(sl_action)
done = False
# [0.8,0,0] lateral

k_bin = np.linspace(-1, 1, num=10)
i_bin = np.linspace(-1, 1, num=5)

current_action = [0, 0, 0, 0]
sl_action = [1, 10, 10, 5, 5, 1, 1, 2, 2, 1, 1, 1, 1]
# pid_lon = PID(kk, ii, 0, setpoint=-1.08e-01)
pid_lon = PID(0.2, 0.2, 0.01, setpoint=1.01e-04)  # controls q, theta
pid_lat = PID(0.2, 0.1, 0.01, setpoint=-1.08e-01)  # controls p, fi

pid_col = PID(-2, -3, -0.3, setpoint=0)
pid_ped = PID(-1, -1, -0.5, setpoint=-1.03e-03)
my_env.current_states = my_env.reset()
done = False