def main(args): dir_name = ROLLOUT_DIR + "/rollout_" + args.env_name + "/" episodes = args.N action_dim = action_space_dimension(args.env_name) epochs = args.epochs time_steps = args.time_steps [next_states, correct_state] = import_data(episodes,action_dim,dir_name, time_steps) try: agent = StateAgent(action_dim,args.env_name) except: print('NO DATA FOUND') raise agent.train(next_states,correct_state,epochs) agent.save_weights()
def test_against_environment(env_name, num_runs, agent_name): env = gym.make(env_name) # env.seed(0) try: predictor = load_predictive_model(env_name, env.action_space.n) if agent_name == 'Next_agent': agent = StateAgent(env.action_space.n, env_name) agent.set_weights() elif agent_name == 'DQN': agent = Agent(gamma=0.99, epsilon=0.00, alpha=0.0001, input_dims=(104, 80, 4), n_actions=env.action_space.n, mem_size=25000, eps_min=0.00, batch_size=32, replace=1000, eps_dec=1e-5, env_name=env_name) agent.load_models() except: print( "Error loading model, check environment name and action space dimensions" ) rewards = [] start = time.time() total_steps = 0.0 for i in range(num_runs): frame_queue = deque(maxlen=4) observation = env.reset() done = False if agent_name == 'DQN': init_queue(frame_queue, observation, True) else: init_queue(frame_queue, observation) total_reward = 0.0 frame_count = 0 while not done: observation_states = np.concatenate(frame_queue, axis=2) # Human start of breakout since the next state agent just keeps moving to the left if agent_name == 'Next_agent': if env_name == 'BreakoutDeterministic-v4' and not frame_count: agent_action = 1 else: next_states = predictor.generate_output_states( np.expand_dims(observation_states, axis=0)) agent_action = agent.choose_action_from_next_states( np.expand_dims(next_states, axis=0)) elif agent_name == 'DQN': agent_action = agent.choose_action(observation_states) else: agent_action = env.action_space.sample() observation, reward, done, _ = env.step(agent_action) total_reward += reward frame_count += 1 total_steps += 1 frame_queue.pop() if agent_name == 'DQN': frame_queue.appendleft(preprocess_frame_dqn(observation)) else: frame_queue.appendleft(preprocess_frame(observation)) print("Completed episode {} with reward {}".format( i + 1, total_reward)) rewards.append(total_reward) end = time.time() time_taken = (end - start) / total_steps print("Test complete - Average score: {} Max score: {}".format( np.average(rewards), np.max(rewards))) return (rewards, time_taken)