コード例 #1
0
 def observe(self):
     game_observation = self.game.observe()
     # Logic borrowed from:
     # https://github.com/facebook/MazeBase/blob/23454fe092ecf35a8aab4da4972f231c6458209b/py/example.py#L192
     obs, info = game_observation[OBSERVATION]
     featurizers.grid_one_hot(self.game, obs)
     obs = np.array(obs)
     featurizers.vocabify(self.game, info)
     info = np.array(obs)
     game_observation[OBSERVATION] = np.concatenate((obs, info),
                                                    2).flatten()
     is_episode_over = self.game.is_over()
     return Observation(id=game_observation[ID],
                        reward=game_observation[REWARD],
                        state=game_observation[OBSERVATION],
                        is_episode_over=is_episode_over)
コード例 #2
0
frame = 0
game.display()
sleep(.1)
system('clear')
while True:
    print("r: {}\ttr: {} \tguess: {}".format(game.reward(),
                                             game.reward_so_far(),
                                             game.approx_best_reward()))
    config = game.observe()
    pp.pprint(config['observation'][1])
    # Uncomment this to featurize into one-hot vectors
    obs, info = config['observation']
    featurizers.grid_one_hot(game, obs)
    obs = np.array(obs)
    featurizers.vocabify(game, info)
    info = np.array(obs)
    config['observation'] = obs, info
    game.display()

    id = game.current_agent()
    actions = game.all_possible_actions()
    action = action_func(actions)
    game.act(action)

    sleep(.1)
    system('clear')
    print("\n")
    frame += 1
    if game.is_over() or frame > 300:
        frame = 0