def evaluate_MPC(): file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//evaluation_scripts//mpc' k, p = 100, 10 Q_weights = np.array([0.25, 4, 0.25, 0.25, 365, 131]) Q = np.eye(6)*Q_weights*k R_weights = np.array([0.1, 0.1, 10]) R = np.eye(3)*R_weights*p mpc_settings = {'time_horizon':30, 'control_horizon': 5, 'time_step': 1 / 30, 'time_span': 30, 'flight_time_span': 50, 'ground_altitude': 5.2, 'max_altitude': 26, 'x_target': 16.5, 'finite_differences_step_size': 50, 'Optimisation Type': 1, 'Q': Q, 'R': R} simulation_settings['Observation Space Size'] = 6 env = RocketLander(simulation_settings) controller = MPC_Controller(env, mpc_settings=mpc_settings) testing_framework = Evaluation_Framework(simulation_settings, mpc_settings) reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller, INITIAL_STATES, INITIAL_FORCES, DISTURBANCES, IMPULSES) return file_path, reward_results, final_state_history, action_history
def evaluate_pid(): file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//evaluation_scripts//pid' env = RocketLander(simulation_settings) pid_controller = PID_Controller() testing_framework = Evaluation_Framework(simulation_settings) reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, pid_controller, INITIAL_STATES, INITIAL_FORCES, DISTURBANCES, IMPULSES) return file_path, reward_results, final_state_history, action_history
def evaluate_pid(): file_path = CWD + os.path.sep + 'evaluation_scripts' + os.path.sep + 'pid' env = RocketLander(simulation_settings) pid_controller = PID_Controller() testing_framework = Evaluation_Framework(simulation_settings) reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, pid_controller, INITIAL_STATES, INITIAL_FORCES, DISTURBANCES, IMPULSES) return file_path, reward_results, final_state_history, action_history
def evaluate_q_learning(model_settings): load_path = CWD + os.path.sep + 'control_and_ai' + os.path.sep + 'rl_q_learning' + os.path.sep + model_settings['dir'] file_path = CWD + os.path.sep + 'evaluation_scripts' + os.path.sep + 'rl_q_learning' + os.path.sep + model_settings['dir'] env = RocketLander(simulation_settings) controller = Q_Learning_Controller(load_path=load_path, low_discretization=False, simulation_settings=simulation_settings) testing_framework = Evaluation_Framework(simulation_settings) reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller, INITIAL_STATES, INITIAL_FORCES, DISTURBANCES, IMPULSES) return file_path, reward_results, final_state_history, action_history
def evaluate_high_discretization_q_learning_function_approximation_longer_state(): file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//evaluation_scripts//rl_q_learning//high_discretization_longer_state' load_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//function_approximation_rl' \ '//rl_linear_function_approximation_high_function_discretization_5000_episodes_trained_at_once.p' env = RocketLander(simulation_settings) controller = Q_Learning_Controller_Longer_State(load_path=load_path, low_discretization=False, simulation_settings=simulation_settings) testing_framework = Evaluation_Framework(simulation_settings) reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller, INITIAL_STATES, INITIAL_FORCES, DISTURBANCES, IMPULSES) return file_path, reward_results, final_state_history, action_history
def evaluate_unnormalized_longer_state_ddpg(): load_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//DDPG//model_2_longer_unnormalized_state' file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//evaluation_scripts//ddpg//model_2_unnormalized_longer_state' normal_state_FLAG = False untransformed_state_FLAG = False normalized_FLAG = False simulation_settings['Observation Space Size'] = 16 env = RocketLander(simulation_settings) controller = Unnormalized_DDPG_Controller_Longer_State(env=env, load_path=load_path, normal_state_FLAG=normal_state_FLAG, untransformed_state_FLAG=untransformed_state_FLAG, normalized_FLAG=normalized_FLAG) # , simulation_settings=simulation_settings) testing_framework = Evaluation_Framework(simulation_settings) reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller, INITIAL_STATES, INITIAL_FORCES, DISTURBANCES, IMPULSES) return file_path, reward_results, final_state_history, action_history
def evaluate_ddpg(model_settings): load_path = model_settings['dir'] file_path = CWD + os.path.sep + 'evaluation_scripts' + os.path.sep + 'ddpg' + os.path.sep + model_settings['dir'] short_state_FLAG = model_settings['short'] untransformed_state_FLAG = model_settings['untransformed'] normalized_FLAG = model_settings['normalized'] simulation_settings['Observation Space Size'] = model_settings['obs_size'] env = RocketLander(simulation_settings) controller = DDPG_Controller(env=env, load_path=load_path, short_state_FLAG=short_state_FLAG, untransformed_state_FLAG=untransformed_state_FLAG, normalized_FLAG=normalized_FLAG)#, simulation_settings=simulation_settings) testing_framework = Evaluation_Framework(simulation_settings) reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller, INITIAL_STATES, INITIAL_FORCES, DISTURBANCES, IMPULSES) return file_path, reward_results, final_state_history, action_history
def evaluate_normalized_normal_state_ddpg(): # Model 1 # Fuel Cost = 0, Max Steps = 500, Episode Training = 2000, RANDOM FORCE = 20000, RANDOM X_FORCE = 0.2*RANDOM FORCE load_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//DDPG//model_normal_state' file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//DDPG//model_normal_state' normal_state_FLAG = True untransformed_state_FLAG = False normalized_FLAG = True simulation_settings['Observation Space Size'] = 8 env = RocketLander(simulation_settings) controller = Normalized_DDPG_Controller(env=env, load_path=load_path, normal_state_FLAG=normal_state_FLAG, untransformed_state_FLAG=untransformed_state_FLAG, normalized_FLAG=normalized_FLAG)#, simulation_settings=simulation_settings) testing_framework = Evaluation_Framework(simulation_settings) reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller, INITIAL_STATES, INITIAL_FORCES, DISTURBANCES, IMPULSES) return file_path, reward_results, final_state_history, action_history
eps = [] eps.append(OUPolicy(0, 0.2, 0.4)) eps.append(OUPolicy(0, 0.2, 0.4)) eps.append(OUPolicy(0, 0.2, 0.4)) simulation_settings = {'Side Engines': True, 'Clouds': True, 'Vectorized Nozzle': True, 'Graph': False, 'Render': False, 'Starting Y-Pos Constant': 1, 'Initial Force': 'random', 'Rows': 1, 'Columns': 2, 'Episodes': 500} env = RocketLander(simulation_settings) #env = wrappers.Monitor(env, '/tmp/contlunarlander', force=True, write_upon_reset=True) FLAGS.retrain = False # Restore weights if False FLAGS.test = False FLAGS.num_episodes = 500 model_dir = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//DDPG//model_2_longer_unnormalized_state' with tf.device('/cpu:0'): agent = DDPG( action_bounds, eps, env.observation_space.shape[0], actor_learning_rate=0.0001, critic_learning_rate=0.001, retrain=FLAGS.retrain, log_dir=FLAGS.log_dir,