Beispiel #1
0
def run(env, solver, args):
    """Run multiple epochs as an experiment."""
    # Save all results in the experiment
    results = Results()

    print_divider('large')

    # Run for multiple epochs
    for epoch in tqdm.tqdm(range(args['n_epochs'])):
        # Show epochs progress
        if not args['quiet']:
            print_divider('medium')
            console(2, module, "Epoch: " + str(epoch + 1))

        epoch_start = time.time()

        # Play a game with policies solved by the solver
        game_history = _play_game(env, solver, args)

        # Record the results
        results.time.add(time.time() - epoch_start)
        results.update_reward_results(
            game_history.undiscounted_return(),
            game_history.discounted_return(args['discount']))

        if not args['quiet']:
            print_divider('medium')

    # Show the results
    results.show(args['n_epochs'])

    # Write the results to the log
    _log_result(results, args)
Beispiel #2
0
 def show(self):
     console(3, module, 'State: ' + self.state.to_string())
     console(3, module, 'Action: ' + self.action.to_string())
     if not isinstance(self.obs, tuple):
         console(3, module, 'Observation: ' + self.obs.to_string())
     else:
         console(
             3, module,
             'Observation: ' + '; '.join(o.to_string() for o in self.obs))
     console(3, module, "Reward: " + str(self.reward))
Beispiel #3
0
    def play_game(self):
        """Play the entire game for one epoch."""

        state = self._env.initial_state()
        obs = self._env.initial_obs()
        # Get the first non-chance node as the root
        while state.is_chance():
            legal_actions, prob_list = state.chance_outcomes()
            action = np.random.choice(legal_actions, p=prob_list)
            step_record = self._env.step(state, action)
            state = step_record.next_state
            obs = step_record.obs

        # Set root node and the corresponding particle bin
        root = ObservationNode(obs, depth=0)
        for _ in range(self.n_start_states):
            possible_states, prob_list = self._env.possible_states(obs)
            particle = np.random.choice(possible_states, p=prob_list)
            root.particle_bin.append(particle)

        history = History()

        # Solve the game by step until a terminal state
        while not state.is_terminal() and root.depth < self.max_depth:
            assert not state.is_chance()
            # Get an action by planning
            action = self._solve_one_step(root)
            # Get step result
            step_record = self._env.step(state, action)

            # Show the step
            if not self.quiet:
                print_divider('small')
                console(3, module, "Step: " + str(root.depth))
                step_record.show()

            history.append(step_record)
            state = step_record.next_state

            # Get the next non-chance node
            while state.is_chance():
                legal_actions, prob_list = state.chance_outcomes()
                chance_action = np.random.choice(legal_actions, p=prob_list)
                step_record = self._env.step(state, chance_action)

            root = root.find_child(action).find_child(step_record.obs)

        return history
Beispiel #4
0
 def show(self, n_epochs):
     print_divider('large')
     console(2, module, 'epochs: %d' % n_epochs)
     console(
         2, module, 'ave undiscounted return/epoch: %.3f +- %.3f' %
         (self.undiscounted_return.mean,
          self.undiscounted_return.std_err()))
     console(
         2, module, 'ave discounted return/epoch: %.3f +- %.3f' %
         (self.discounted_return.mean, self.discounted_return.std_err()))
     console(2, module, 'ave time/epoch: %.3f' % self.time.mean)
     print_divider('medium')
Beispiel #5
0
def run(solver, args):
    """Run multiple epochs as an experiment."""

    print('Envirinment: %s, Solver: %s' % (args['env'], args['solver']))
    print_divider('large')

    if solver.online:
        # Save all results in the experiment
        results = Results()

        # Run for multiple epochs
        for epoch in tqdm.tqdm(range(args['n_epochs']), unit='epoch'):
            # Show epochs progress
            if not args['quiet']:
                print_divider('medium')
                console(2, module, "Epoch: " + str(epoch + 1))

            epoch_start = time.time()

            # Play a game for online solvers
            game_history = _play_game(solver)

            # Record the results
            results.time.add(time.time() - epoch_start)
            results.update_reward_results(
                game_history.get_return(),
                game_history.get_return(args['discount']))

        if not args['quiet']:
            print_divider('medium')

        # Show the results
        results.show(args['n_epochs'])
        # Write the results to the log
        _log_result(results, args)

        return results

    else:  # train the policy offline
        policy = _train_policy(solver)

        return policy
Beispiel #6
0
    def solve_game(self, env):
        """Solve the entire game for one epoch."""

        state = env.new_initial_state()
        obs = state.initial_obs()

        # Set root node and the corresponding particle bin
        root = ObservationNode(obs, depth=0)
        for _ in range(self.n_start_states):
            particle = env.new_initial_state()
            while particle.initial_obs() != obs:
                particle = env.new_initial_state()
            root.particle_bin.append(particle)

        history = History()

        # Solve the game by step until a terminal state
        while not state.is_terminal():
            # Get an action by planning
            action = self._solve_one_step(root, env)

            # Get step result
            step_record = env.step(state, action)

            # Show the step
            if not self.quiet:
                print_divider('small')
                console(3, module, "Step: " + str(root.depth))
                step_record.show()

            history.append(step_record)

            state = step_record.next_state
            root = root.find_child(action).find_child(step_record.obs)

        return history