def __init__(self, initial): """ Remplit la grille avec des valeurs qui respectent les carrés.""" state = np.array([frozenset(range(1, 10) if k == 0 else [k]) for k in initial]).reshape((9,9)) state = normalize_state(state) self.initial = tuple(state.flatten())
def result(self, state, action): """ Calcule la configuration résultante à appliquer une action sur une configuration. Le nouvel état est une copie modifiée de l'état passé en argument. """ state = numpify_state(state) i, j, k = action state.itemset((i, j), frozenset([k])) # normalize normalized_state = normalize_state(state) return tuple(normalized_state.flatten())
env_name = "Boxing-ram-v0" try: model_filename = sys.argv[2] except IndexError: model_filename = "Boxing-ram-v0_10000e.h5" try: num_episodes = int(sys.argv[3]) except IndexError: num_episodes = 10 print("Playing agent {} for {} episodes.".format(model_filename, num_episodes)) env = gym.make(env_name) agent = DQNAgent(env, epsilon=1.0, model_filename=model_filename) for i_episode in range(num_episodes): state = normalize_state(env.reset()) done = False total_reward = 0 while not done: env.render() action = agent.act(state) state, reward, done, info = env.step(action) state = normalize_state(state) total_reward += reward print("Episode {} reward: {}".format(i_episode + 1, total_reward)) env.close()
num_episodes = int(sys.argv[2]) except IndexError: num_episodes = 2000 env = gym.make(env_name) agent = DQNAgent(env) for i_episode in range(num_episodes): # For timing every episode. ts_start = datetime.now() # For tracking accumulative reward. total_reward = 0 lives = env.env.ale.lives() state = normalize_state(env.reset()) done = False while not done: # Comment out env.render() for faster training. # env.render() action = agent.act(state) next_state, reward, done, info = env.step(action) next_state = normalize_state(next_state) # If a life is lost, pass terminal state if info["ale.lives"] < lives: lives = info["ale.lives"] done = True agent.remember(state, action, reward, next_state, done) state = next_state total_reward += reward