for episode in range(episode_number): while (1): a = pid.pid_algorithm( s) # pass the state to the algorithm, get the actions # Step through the simulation (1 step). Refer to Simulation Update in constants.py s, r, done, info = env._step(a) total_reward += r # Accumulate reward # ------------------------------------- # Optional render env._render() # Draw the target env.draw_marker(env.landing_coordinates[0], env.landing_coordinates[1]) # Refresh render env.refresh(render=False) # When should the barge move? Water movement, dynamics etc can be simulated here. if s[LEFT_GROUND_CONTACT] == 0 and s[RIGHT_GROUND_CONTACT] == 0: env.move_barge_randomly(epsilon, left_or_right_barge_movement) # Random Force on rocket to simulate wind. env.apply_random_x_disturbance( epsilon=0.005, left_or_right=left_or_right_barge_movement) env.apply_random_y_disturbance(epsilon=0.005) # Touch down or pass abs(THETA_LIMIT) if done: print('Episode:\t{}\tTotal Reward:\t{}'.format( episode, total_reward)) total_reward = 0 env._reset()
last_best_noisy_game = -1000 max_game_average = -1000 noisy_game_no_longer_valid = False #Create testing enviroment settings = { 'Side Engines': True, 'Clouds': True, 'Vectorized Nozzle': True, 'Starting Y-Pos Constant': 1, 'Initial Force': 'random' } # (6000, -10000)} env = RocketLander(settings) env.refresh(render=True) env.render(mode="human") env.reset() print("-- Observations", env.observation_space) print("-- actionspace", env.action_space) #initialize training matrix with random states and actions #dataX = np.random.random(( 5,num_env_variables+num_env_actions )) dataX = tf.placeholder("float", [None, num_env_variables + num_env_actions]) #Only one output for the total score / reward #dataY = np.random.random((5,1)) dataY = tf.placeholder("float", [None, 1]) #initialize training matrix with random states and actions