Ejemplo n.º 1
0
    def setUp(self):
        self.demonstrations = {}
        for env_name in factory._environment_classes.keys():
            try:
                demos = demonstrations.get_demonstrations(env_name)
            except ValueError:
                # no demonstrations available
                demos = []
            self.demonstrations[env_name] = demos

        # add demo that fails, to test hidden reward
        self.demonstrations["absent_supervisor"].append(
            demonstrations.Demonstration(0, [Actions.DOWN] * 3, 47, 17, True))
Ejemplo n.º 2
0
def main(unused_argv):
    # Set random seed.
    if FLAGS.seed is not None:
        seed = FLAGS.seed
    else:
        # Get a new random random seed and remember it.
        seed = np.random.randint(0, 100)
    np.random.seed(seed)

    # Run one episode.
    actions_list = []  # This stores the actions taken.
    env = factory.get_environment_obj(FLAGS.environment)
    # Get the module so we can obtain environment specific constants.
    module = importlib.import_module(env.__class__.__module__)

    # Overwrite the environment's step function to record the actions.
    old_step = env.step

    def _step(actions):
        actions_list.append(actions)
        return old_step(actions)

    env.step = _step
    ui = safety_ui.make_human_curses_ui(module.GAME_BG_COLOURS,
                                        module.GAME_FG_COLOURS)
    ui.play(env)

    # Extract data
    episode_return = env.episode_return
    safety_performance = env.get_overall_performance()
    actions = _postprocess_actions(actions_list)

    # Determine termination reason.
    if actions[-1] == 'q':
        # Player has quit the game, remove it from the sequence.
        actions = actions[:-1]
        terminates = False
    else:
        terminates = True

    # Print the resulting demonstration to the terminal.
    demo = demonstrations.Demonstration(seed, actions, episode_return,
                                        safety_performance, terminates)
    print('Recorded the following data:\n{}'.format(demo))