def run(env, solver, args): """Run multiple epochs as an experiment.""" # Save all results in the experiment results = Results() print_divider('large') # Run for multiple epochs for epoch in tqdm.tqdm(range(args['n_epochs'])): # Show epochs progress if not args['quiet']: print_divider('medium') console(2, module, "Epoch: " + str(epoch + 1)) epoch_start = time.time() # Play a game with policies solved by the solver game_history = _play_game(env, solver, args) # Record the results results.time.add(time.time() - epoch_start) results.update_reward_results( game_history.undiscounted_return(), game_history.discounted_return(args['discount'])) if not args['quiet']: print_divider('medium') # Show the results results.show(args['n_epochs']) # Write the results to the log _log_result(results, args)
def show(self): console(3, module, 'State: ' + self.state.to_string()) console(3, module, 'Action: ' + self.action.to_string()) if not isinstance(self.obs, tuple): console(3, module, 'Observation: ' + self.obs.to_string()) else: console( 3, module, 'Observation: ' + '; '.join(o.to_string() for o in self.obs)) console(3, module, "Reward: " + str(self.reward))
def play_game(self): """Play the entire game for one epoch.""" state = self._env.initial_state() obs = self._env.initial_obs() # Get the first non-chance node as the root while state.is_chance(): legal_actions, prob_list = state.chance_outcomes() action = np.random.choice(legal_actions, p=prob_list) step_record = self._env.step(state, action) state = step_record.next_state obs = step_record.obs # Set root node and the corresponding particle bin root = ObservationNode(obs, depth=0) for _ in range(self.n_start_states): possible_states, prob_list = self._env.possible_states(obs) particle = np.random.choice(possible_states, p=prob_list) root.particle_bin.append(particle) history = History() # Solve the game by step until a terminal state while not state.is_terminal() and root.depth < self.max_depth: assert not state.is_chance() # Get an action by planning action = self._solve_one_step(root) # Get step result step_record = self._env.step(state, action) # Show the step if not self.quiet: print_divider('small') console(3, module, "Step: " + str(root.depth)) step_record.show() history.append(step_record) state = step_record.next_state # Get the next non-chance node while state.is_chance(): legal_actions, prob_list = state.chance_outcomes() chance_action = np.random.choice(legal_actions, p=prob_list) step_record = self._env.step(state, chance_action) root = root.find_child(action).find_child(step_record.obs) return history
def show(self, n_epochs): print_divider('large') console(2, module, 'epochs: %d' % n_epochs) console( 2, module, 'ave undiscounted return/epoch: %.3f +- %.3f' % (self.undiscounted_return.mean, self.undiscounted_return.std_err())) console( 2, module, 'ave discounted return/epoch: %.3f +- %.3f' % (self.discounted_return.mean, self.discounted_return.std_err())) console(2, module, 'ave time/epoch: %.3f' % self.time.mean) print_divider('medium')
def run(solver, args): """Run multiple epochs as an experiment.""" print('Envirinment: %s, Solver: %s' % (args['env'], args['solver'])) print_divider('large') if solver.online: # Save all results in the experiment results = Results() # Run for multiple epochs for epoch in tqdm.tqdm(range(args['n_epochs']), unit='epoch'): # Show epochs progress if not args['quiet']: print_divider('medium') console(2, module, "Epoch: " + str(epoch + 1)) epoch_start = time.time() # Play a game for online solvers game_history = _play_game(solver) # Record the results results.time.add(time.time() - epoch_start) results.update_reward_results( game_history.get_return(), game_history.get_return(args['discount'])) if not args['quiet']: print_divider('medium') # Show the results results.show(args['n_epochs']) # Write the results to the log _log_result(results, args) return results else: # train the policy offline policy = _train_policy(solver) return policy
def solve_game(self, env): """Solve the entire game for one epoch.""" state = env.new_initial_state() obs = state.initial_obs() # Set root node and the corresponding particle bin root = ObservationNode(obs, depth=0) for _ in range(self.n_start_states): particle = env.new_initial_state() while particle.initial_obs() != obs: particle = env.new_initial_state() root.particle_bin.append(particle) history = History() # Solve the game by step until a terminal state while not state.is_terminal(): # Get an action by planning action = self._solve_one_step(root, env) # Get step result step_record = env.step(state, action) # Show the step if not self.quiet: print_divider('small') console(3, module, "Step: " + str(root.depth)) step_record.show() history.append(step_record) state = step_record.next_state root = root.find_child(action).find_child(step_record.obs) return history