def main(): gym_env = gym.make('custom_gym:Xplane-v0') lr = 0.001 gam = 0.01 n_games = 1 # nn_input = obs() agent = Agent(learning_rate=lr, gamma=gam, epsilon=1.0, input_dims=(6, ), n_actions=15, batch_size=32, file_name='AI_takeoff/saved_models/dq_model_2.h5') scores = [] total_steps = [] eps_hist = [] agent.load_model() for i in range(n_games): try: done = False score = 0 observation = gym_env.reset() time.sleep(2) observation_checkpoints = np.array([observation[0:2]]) step_counter = 0 print("GAME ITERATION ", i) while not done: action = agent.choose_action(observation) new_observation, reward, done = gym_env.step(action) step_counter = step_counter + 1 score = score + reward agent.store_transition(observation, action, reward, new_observation, done) observation = new_observation # agent.learn() # This if statement checks if the airplane is stuck observation_checkpoints = np.append(observation_checkpoints, [new_observation[0:2]], axis=0) print(observation_checkpoints) print("stepcounter is", step_counter) if step_counter % 30 == 0: if np.array_equal( observation_checkpoints[step_counter - 30], observation_checkpoints[step_counter - 1]): done = True eps_hist.append(agent.epsilon) scores.append(score) total_steps.append(step_counter) except Exception as e: print(str(e))
def main(env_name=None): ENV_NAME = 'wumpus-v0' if env_name: ENV_NAME = env_name MODEL_DIR = f'models/{ENV_NAME}-dqn' MODEL_FILE = f'{ENV_NAME}-dqn.h5' CHECKPOINTS_DIR = f'models/{ENV_NAME}-dqn/checkpoints' TEST_IMG_DIR = f'tests/{ENV_NAME}-dqn' env = gym.make(ENV_NAME) env.reset() agent = Agent(learning_rate=0.01, gamma=0.95, state_shape=env.observation_space.shape, actions=7, batch_size=64, epsilon_initial=0.0, epsilon_decay=0, epsilon_final=0.0, replay_buffer_capacity=1000000, model_name=MODEL_FILE, model_dir=MODEL_DIR, ckpt_dir=CHECKPOINTS_DIR) agent.load_model() done = False score = 0 steps_per_episode = 0 state = env.reset() images = [env.render('rgb_array')] while not done: # Choose action according to policy, and execute action = agent.select_action(state) state, reward, done, _ = env.step(action) score += reward steps_per_episode += 1 images.append(env.render('rgb_array')) # Generate GIF for the execution create_gif( f'{ENV_NAME}.gif', np.array(images), fps=1.0 ) print( f'Model \'{str(ENV_NAME)}\', score {score}, steps {steps_per_episode}')
epsilon=0, epsilon_dec=0.9996, epsilon_min=0.01, input_shape=input_shape, h1_dims=h1_dims, action_space=action_space, fname=file_2) if train_networks == True: memory_2 = training_tools.ReplayBuffer(mem_size, input_shape, len(action_space)) # Load networks if specified if load_networks == True: if p1_type == 'Agent': agent_1.load_model(file_1) if p2_type == 'Agent': agent_2.load_model(file_2) print('\n... Models Loaded ...\n') # Initialize Pong environment screen_Size = (1000, 600) env = pongGame(screen_Size, p1_type, p2_type, action_space) if show_game == True: framerate = 60 env.setupWindow(framerate)