def evolutionary_network_variable_psi_transformable_state( simulation_settings, logger, save=False): print("evolutionary_network_variable_psi_transformable_state") # saved in weights_es.npy filepath = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//weights_es_variable_psi_transformable_state.npy' evo_strategy_parameters = { 'population_size': 100, 'action_size': 3, 'noise_standard_deviation': 0.1, 'number_of_generations': 200, 'learning_rate': 0.00025, 'state_size': 14, 'max_num_actions': 250 } env = [] for i in range(evo_strategy_parameters['population_size'] + 1): env.append(RocketLander(simulation_settings)) if save: save_file = filepath load_file = None else: save_file = None load_file = filepath evolutionary_network_transformable_state(env, evo_strategy_parameters, logger, load_file, save_file)
from environments.rocketlander import RocketLander from constants import LEFT_GROUND_CONTACT, RIGHT_GROUND_CONTACT import numpy as np if __name__ == "__main__": # Settings holds all the settings for the rocket lander environment. settings = { 'Side Engines': True, 'Clouds': True, 'Vectorized Nozzle': True, 'Starting Y-Pos Constant': 1, 'Initial Force': 'random' } # (6000, -10000)} env = RocketLander(settings) s = env._reset() from control_and_ai.pid import PID_Benchmark # Initialize the PID algorithm pid = PID_Benchmark() left_or_right_barge_movement = np.random.randint(0, 2) epsilon = 0.05 total_reward = 0 episode_number = 5 for episode in range(episode_number): while (1): a = pid.pid_algorithm(
last_game_average = -1000 last_best_noisy_game = -1000 max_game_average = -1000 noisy_game_no_longer_valid = False #Create testing enviroment settings = { 'Side Engines': True, 'Clouds': True, 'Vectorized Nozzle': True, 'Starting Y-Pos Constant': 1, 'Initial Force': 'random' } # (6000, -10000)} env = RocketLander(settings) env.refresh(render=True) env.render(mode="human") env.reset() print("-- Observations", env.observation_space) print("-- actionspace", env.action_space) #initialize training matrix with random states and actions #dataX = np.random.random(( 5,num_env_variables+num_env_actions )) dataX = tf.placeholder("float", [None, num_env_variables + num_env_actions]) #Only one output for the total score / reward #dataY = np.random.random((5,1)) dataY = tf.placeholder("float", [None, 1])
eps = [] eps.append(OUPolicy(0, 0.2, 0.4)) eps.append(OUPolicy(0, 0.2, 0.4)) eps.append(OUPolicy(0, 0.2, 0.4)) simulation_settings = {'Side Engines': True, 'Clouds': True, 'Vectorized Nozzle': True, 'Graph': False, 'Render': False, 'Starting Y-Pos Constant': 1, 'Initial Force': 'random', 'Rows': 1, 'Columns': 2, 'Episodes': 300} env = RocketLander(simulation_settings) #env = wrappers.Monitor(env, '/tmp/contlunarlander', force=True, write_upon_reset=True) FLAGS.retrain = True # Restore weights if False FLAGS.test = False FLAGS.num_episodes = 300 model_dir = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//DDPG//models_unlimited_episodes_full_normalized_normal_state' agent = DDPG( action_bounds, eps, env.observation_space.shape[0], actor_learning_rate=0.0001, critic_learning_rate=0.001, retrain=FLAGS.retrain, log_dir=FLAGS.log_dir,