def observe(self): game_observation = self.game.observe() # Logic borrowed from: # https://github.com/facebook/MazeBase/blob/23454fe092ecf35a8aab4da4972f231c6458209b/py/example.py#L192 obs, info = game_observation[OBSERVATION] featurizers.grid_one_hot(self.game, obs) obs = np.array(obs) featurizers.vocabify(self.game, info) info = np.array(obs) game_observation[OBSERVATION] = np.concatenate((obs, info), 2).flatten() is_episode_over = self.game.is_over() return Observation(id=game_observation[ID], reward=game_observation[REWARD], state=game_observation[OBSERVATION], is_episode_over=is_episode_over)
frame = 0 game.display() sleep(.1) system('clear') while True: print("r: {}\ttr: {} \tguess: {}".format(game.reward(), game.reward_so_far(), game.approx_best_reward())) config = game.observe() pp.pprint(config['observation'][1]) # Uncomment this to featurize into one-hot vectors obs, info = config['observation'] featurizers.grid_one_hot(game, obs) obs = np.array(obs) featurizers.vocabify(game, info) info = np.array(obs) config['observation'] = obs, info game.display() id = game.current_agent() actions = game.all_possible_actions() action = action_func(actions) game.act(action) sleep(.1) system('clear') print("\n") frame += 1 if game.is_over() or frame > 300: frame = 0