Exemple #1
0
def evaluate_MPC():
    file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//evaluation_scripts//mpc'

    k, p = 100, 10

    Q_weights = np.array([0.25, 4, 0.25, 0.25, 365, 131])
    Q = np.eye(6)*Q_weights*k

    R_weights = np.array([0.1, 0.1, 10])
    R = np.eye(3)*R_weights*p

    mpc_settings = {'time_horizon':30,
                    'control_horizon': 5,
                    'time_step': 1 / 30,
                    'time_span': 30,
                    'flight_time_span': 50,
                    'ground_altitude': 5.2,
                    'max_altitude': 26,
                    'x_target': 16.5,
                    'finite_differences_step_size': 50,
                    'Optimisation Type': 1,
                    'Q': Q,
                    'R': R}

    simulation_settings['Observation Space Size'] = 6
    env = RocketLander(simulation_settings)
    controller = MPC_Controller(env, mpc_settings=mpc_settings)

    testing_framework = Evaluation_Framework(simulation_settings, mpc_settings)
    reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller,
                                                                                               INITIAL_STATES,
                                                                                               INITIAL_FORCES,
                                                                                               DISTURBANCES, IMPULSES)

    return file_path, reward_results, final_state_history, action_history
Exemple #2
0
def evaluate_pid():
    file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//evaluation_scripts//pid'

    env = RocketLander(simulation_settings)
    pid_controller = PID_Controller()
    testing_framework = Evaluation_Framework(simulation_settings)
    reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, pid_controller,
                                                                                               INITIAL_STATES,
                                                                                               INITIAL_FORCES,
                                                                                               DISTURBANCES, IMPULSES)

    return file_path, reward_results, final_state_history, action_history
def evaluate_pid():
    file_path = CWD + os.path.sep + 'evaluation_scripts' + os.path.sep + 'pid'

    env = RocketLander(simulation_settings)
    pid_controller = PID_Controller()
    testing_framework = Evaluation_Framework(simulation_settings)
    reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, pid_controller,
                                                                                               INITIAL_STATES,
                                                                                               INITIAL_FORCES,
                                                                                               DISTURBANCES, IMPULSES)

    return file_path, reward_results, final_state_history, action_history
def evaluate_q_learning(model_settings):
    load_path = CWD + os.path.sep + 'control_and_ai' + os.path.sep + 'rl_q_learning' + os.path.sep + model_settings['dir']

    file_path = CWD + os.path.sep + 'evaluation_scripts' + os.path.sep + 'rl_q_learning' + os.path.sep + model_settings['dir']

    env = RocketLander(simulation_settings)
    controller = Q_Learning_Controller(load_path=load_path, low_discretization=False,
                                       simulation_settings=simulation_settings)
    testing_framework = Evaluation_Framework(simulation_settings)
    reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller,
                                                                                               INITIAL_STATES,
                                                                                               INITIAL_FORCES,
                                                                                               DISTURBANCES, IMPULSES)

    return file_path, reward_results, final_state_history, action_history
Exemple #5
0
def evaluate_high_discretization_q_learning_function_approximation_longer_state():
    file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//evaluation_scripts//rl_q_learning//high_discretization_longer_state'

    load_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//function_approximation_rl' \
                '//rl_linear_function_approximation_high_function_discretization_5000_episodes_trained_at_once.p'

    env = RocketLander(simulation_settings)
    controller = Q_Learning_Controller_Longer_State(load_path=load_path, low_discretization=False,
                                       simulation_settings=simulation_settings)
    testing_framework = Evaluation_Framework(simulation_settings)
    reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller,
                                                                                               INITIAL_STATES,
                                                                                               INITIAL_FORCES,
                                                                                               DISTURBANCES, IMPULSES)

    return file_path, reward_results, final_state_history, action_history
Exemple #6
0
def evaluate_unnormalized_longer_state_ddpg():
    load_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//DDPG//model_2_longer_unnormalized_state'

    file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//evaluation_scripts//ddpg//model_2_unnormalized_longer_state'

    normal_state_FLAG = False
    untransformed_state_FLAG = False
    normalized_FLAG = False

    simulation_settings['Observation Space Size'] = 16
    env = RocketLander(simulation_settings)
    controller = Unnormalized_DDPG_Controller_Longer_State(env=env, load_path=load_path, normal_state_FLAG=normal_state_FLAG,
                                              untransformed_state_FLAG=untransformed_state_FLAG,
                                              normalized_FLAG=normalized_FLAG)  # , simulation_settings=simulation_settings)

    testing_framework = Evaluation_Framework(simulation_settings)
    reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller,
                                                                                               INITIAL_STATES,
                                                                                               INITIAL_FORCES,
                                                                                               DISTURBANCES, IMPULSES)

    return file_path, reward_results, final_state_history, action_history
def evaluate_ddpg(model_settings):
    load_path = model_settings['dir']

    file_path = CWD + os.path.sep + 'evaluation_scripts' + os.path.sep + 'ddpg' + os.path.sep + model_settings['dir']

    short_state_FLAG = model_settings['short']
    untransformed_state_FLAG = model_settings['untransformed']
    normalized_FLAG = model_settings['normalized']

    simulation_settings['Observation Space Size'] = model_settings['obs_size']
    env = RocketLander(simulation_settings)
    controller = DDPG_Controller(env=env, load_path=load_path, short_state_FLAG=short_state_FLAG,
                                              untransformed_state_FLAG=untransformed_state_FLAG,
                                              normalized_FLAG=normalized_FLAG)#, simulation_settings=simulation_settings)
    
    testing_framework = Evaluation_Framework(simulation_settings)
    reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller,
                                                                                               INITIAL_STATES,
                                                                                               INITIAL_FORCES,
                                                                                               DISTURBANCES, IMPULSES)

    return file_path, reward_results, final_state_history, action_history
Exemple #8
0
def evaluate_normalized_normal_state_ddpg():
    # Model 1
    # Fuel Cost = 0, Max Steps = 500, Episode Training = 2000, RANDOM FORCE = 20000, RANDOM X_FORCE = 0.2*RANDOM FORCE
    load_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//DDPG//model_normal_state'
    
    file_path = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//control_and_ai//DDPG//model_normal_state'

    normal_state_FLAG = True
    untransformed_state_FLAG = False
    normalized_FLAG = True

    simulation_settings['Observation Space Size'] = 8
    env = RocketLander(simulation_settings)
    controller = Normalized_DDPG_Controller(env=env, load_path=load_path, normal_state_FLAG=normal_state_FLAG,
                                              untransformed_state_FLAG=untransformed_state_FLAG,
                                              normalized_FLAG=normalized_FLAG)#, simulation_settings=simulation_settings)
    
    testing_framework = Evaluation_Framework(simulation_settings)
    reward_results, final_state_history, action_history = testing_framework.execute_evaluation(env, controller,
                                                                                               INITIAL_STATES,
                                                                                               INITIAL_FORCES,
                                                                                               DISTURBANCES, IMPULSES)

    return file_path, reward_results, final_state_history, action_history
Exemple #9
0
eps = []
eps.append(OUPolicy(0, 0.2, 0.4))
eps.append(OUPolicy(0, 0.2, 0.4))
eps.append(OUPolicy(0, 0.2, 0.4))

simulation_settings = {'Side Engines': True,
                       'Clouds': True,
                       'Vectorized Nozzle': True,
                       'Graph': False,
                       'Render': False,
                       'Starting Y-Pos Constant': 1,
                       'Initial Force': 'random',
                       'Rows': 1,
                       'Columns': 2,
                       'Episodes': 500}
env = RocketLander(simulation_settings)
#env = wrappers.Monitor(env, '/tmp/contlunarlander', force=True, write_upon_reset=True)

FLAGS.retrain = False # Restore weights if False
FLAGS.test = False
FLAGS.num_episodes = 500
model_dir = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//DDPG//model_2_longer_unnormalized_state'
with tf.device('/cpu:0'):
    agent = DDPG(
        action_bounds,
        eps,
        env.observation_space.shape[0],
        actor_learning_rate=0.0001,
        critic_learning_rate=0.001,
        retrain=FLAGS.retrain,
        log_dir=FLAGS.log_dir,