def test_against_environment(env_name, num_runs, agent_name): env = gym.make(env_name) # env.seed(0) try: predictor = load_predictive_model(env_name, env.action_space.n) if agent_name == 'Next_agent': agent = StateAgent(env.action_space.n, env_name) agent.set_weights() elif agent_name == 'DQN': agent = Agent(gamma=0.99, epsilon=0.00, alpha=0.0001, input_dims=(104, 80, 4), n_actions=env.action_space.n, mem_size=25000, eps_min=0.00, batch_size=32, replace=1000, eps_dec=1e-5, env_name=env_name) agent.load_models() except: print( "Error loading model, check environment name and action space dimensions" ) rewards = [] start = time.time() total_steps = 0.0 for i in range(num_runs): frame_queue = deque(maxlen=4) observation = env.reset() done = False if agent_name == 'DQN': init_queue(frame_queue, observation, True) else: init_queue(frame_queue, observation) total_reward = 0.0 frame_count = 0 while not done: observation_states = np.concatenate(frame_queue, axis=2) # Human start of breakout since the next state agent just keeps moving to the left if agent_name == 'Next_agent': if env_name == 'BreakoutDeterministic-v4' and not frame_count: agent_action = 1 else: next_states = predictor.generate_output_states( np.expand_dims(observation_states, axis=0)) agent_action = agent.choose_action_from_next_states( np.expand_dims(next_states, axis=0)) elif agent_name == 'DQN': agent_action = agent.choose_action(observation_states) else: agent_action = env.action_space.sample() observation, reward, done, _ = env.step(agent_action) total_reward += reward frame_count += 1 total_steps += 1 frame_queue.pop() if agent_name == 'DQN': frame_queue.appendleft(preprocess_frame_dqn(observation)) else: frame_queue.appendleft(preprocess_frame(observation)) print("Completed episode {} with reward {}".format( i + 1, total_reward)) rewards.append(total_reward) end = time.time() time_taken = (end - start) / total_steps print("Test complete - Average score: {} Max score: {}".format( np.average(rewards), np.max(rewards))) return (rewards, time_taken)
def init_queue(queue, observation, dqn=False): for i in range(4): if dqn: queue.append(preprocess_frame_dqn(observation)) else: queue.append(preprocess_frame(observation))
def main(args): env_name = args.env_name env = gym.make(env_name) env.seed(0) num_games = 5 load_checkpoint = True best_score = 0 agent = Agent(gamma=0.99, epsilon=0.0, alpha=0.0001, input_dims=(104, 80, 4), n_actions=env.action_space.n, mem_size=25000, eps_min=0.02, batch_size=32, replace=1000, eps_dec=1e-5, env_name=env_name) try: agent.load_models() except: print('No DQN models found for %s in models folder' % env_name) raise scores, eps_history = [], [] n_steps = 0 for i in range(num_games): done = False observation = env.reset() frame_queue = deque(maxlen=4) observation = preprocess_frame_dqn(observation) for j in range(4): frame_queue.append(observation) observation = np.concatenate(frame_queue, axis=2) score = 0 while not done: action = agent.choose_action(observation) next_frame, reward, done, info = env.step(action) n_steps += 1 score += reward frame_queue.pop() frame_queue.appendleft(preprocess_frame_dqn(next_frame)) observation_ = np.concatenate(frame_queue, axis=2) observation = observation_ scores.append(score) avg_score = np.mean(scores[-100:]) print('episode: ', i, 'score: ', score, ' average score %.3f' % avg_score, 'epsilon %.2f' % agent.epsilon, 'steps', n_steps) eps_history.append(agent.epsilon)
def main(args): env_name = args.env_name new_model = args.new_model num_games = args.num_games env = gym.make(env_name) env.seed(0) # terminating_steps = 250000 # set this to be a very low number so it works for a variety of games # should be set to minimum score of game best_score = -9999.0 agent = Agent(gamma=0.99, epsilon=1.0, alpha=0.0001, input_dims=(104, 80, 4), n_actions=env.action_space.n, mem_size=25000, eps_min=0.02, batch_size=32, replace=1000, eps_dec=1e-5, env_name=env_name) if not new_model: try: agent.load_models() except: print('No DQN models found for %s in models folder' % env_name) raise scores, eps_history = [], [] n_steps = 0 for i in range(num_games): done = False observation = env.reset() frame_queue = deque(maxlen=4) observation = preprocess_frame_dqn(observation) for j in range(4): frame_queue.append(observation) observation = np.concatenate(frame_queue, axis=2) score = 0 while not done: action = agent.choose_action(observation) next_frame, reward, done, info = env.step(action) n_steps += 1 score += reward frame_queue.pop() frame_queue.appendleft(preprocess_frame_dqn(next_frame)) observation_ = np.concatenate(frame_queue, axis=2) agent.store_transition(observation, action, reward, observation_, int(done)) agent.learn() observation = observation_ scores.append(score) avg_score = np.mean(scores[-100:]) print('episode: ', i, 'score: ', score, ' average score %.3f' % avg_score, 'epsilon %.2f' % agent.epsilon, 'steps', n_steps) if avg_score > best_score: agent.save_models() print('avg score %.2f better than best score %.2f, saving model' % (avg_score, best_score)) best_score = avg_score eps_history.append(agent.epsilon)
def generate_agent_episodes(args): full_path = ROLLOUT_DIR + '/rollout_' + args.env_name if not os.path.exists(full_path): os.umask(0o000) os.makedirs(full_path) env_name = args.env_name total_episodes = args.total_episodes time_steps = args.time_steps envs_to_generate = [env_name] for current_env_name in envs_to_generate: print("Generating data for env {}".format(current_env_name)) env = gym.make(current_env_name) # Create the environment env.seed(0) # First load the DQN agent and the predictive auto encoder with their weights agent = Agent(gamma=0.99, epsilon=0.0, alpha=0.0001, input_dims=(104, 80, 4), n_actions=env.action_space.n, mem_size=25000, eps_min=0.0, batch_size=32, replace=1000, eps_dec=1e-5, env_name=current_env_name) agent.load_models() predictor = load_predictive_model(current_env_name, env.action_space.n) s = 0 while s < total_episodes: rollout_file = os.path.join(full_path, 'rollout-%d.npz' % s) observation = env.reset() frame_queue = deque(maxlen=4) dqn_queue = deque(maxlen=4) t = 0 next_state_sequence = [] correct_state_sequence = [] total_reward = 0 while t < time_steps: # preprocess frames for predictive model and dqn converted_obs = preprocess_frame(observation) converted_obs_dqn = preprocess_frame_dqn(observation) if t == 0: for i in range(4): frame_queue.append(converted_obs) dqn_queue.append(converted_obs_dqn) else: frame_queue.pop() dqn_queue.pop() frame_queue.appendleft(converted_obs) dqn_queue.appendleft(converted_obs_dqn) observation_states = np.concatenate(frame_queue, axis=2) dqn_states = np.concatenate(dqn_queue, axis=2) next_states = predictor.generate_output_states( np.expand_dims(observation_states, axis=0)) next_state_sequence.append(next_states) action = agent.choose_action(dqn_states) correct_state_sequence.append( encode_action(env.action_space.n, action)) observation, reward, done, info = env.step( action) # Take a random action total_reward += reward t = t + 1 print( "Episode {} finished after {} timesteps with reward {}".format( s, t, total_reward)) np.savez_compressed(rollout_file, next=next_state_sequence, correct=correct_state_sequence) s = s + 1 env.close()
def main(args): env_name = args.env_name total_episodes = args.total_episodes time_steps = args.time_steps informed = args.informed # action_refresh_rate = args.action_refresh_rate if informed: full_path = ROLLOUT_DIR + '/informed_rollout_' + args.env_name else: full_path = ROLLOUT_DIR + '/random_rollout_' + args.env_name if not os.path.exists(full_path): os.umask(0o000) os.makedirs(full_path) envs_to_generate = [env_name] for current_env_name in envs_to_generate: print("Generating data for env {}".format(current_env_name)) env = gym.make(current_env_name) # Create the environment env.seed(0) s = 0 if informed: agent = load_dqn(env) while s < total_episodes: rollout_file = os.path.join(full_path, 'rollout-%d.npz' % s) observation = env.reset() frame_queue = deque(maxlen=4) dqn_queue = deque(maxlen=4) t = 0 obs_sequence = [] action_sequence = [] next_sequence = [] while t < time_steps: # convert image to greyscale, downsize converted_obs = preprocess_frame(observation) if t == 0: for i in range(4): frame_queue.append(converted_obs) else: frame_queue.pop() frame_queue.appendleft(converted_obs) stacked_state = np.concatenate(frame_queue, axis=2) obs_sequence.append(stacked_state) if informed: dqn_obs = preprocess_frame_dqn(observation) if t == 0: for i in range(4): dqn_queue.append(dqn_obs) else: dqn_queue.pop() dqn_queue.appendleft(dqn_obs) stacked = np.concatenate(dqn_queue, axis=2) action = agent.choose_action(stacked) else: action = env.action_space.sample() action_sequence.append( encode_action(env.action_space.n, action)) observation, _, _, _ = env.step(action) # Take a random action t = t + 1 next_sequence.append(preprocess_frame(observation)) print("Episode {} finished after {} timesteps".format(s, t)) np.savez_compressed(rollout_file, obs=obs_sequence, actions=action_sequence, next_frame=next_sequence) s = s + 1 env.close()